xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c8a8475e04bcaa43590892a5c3e60c6f87bc31f7)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 EXTERN int MatUseSpooles_MPIAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
28   int        n = aij->B->n,i,ierr;
29 
30   PetscFunctionBegin;
31 #if defined (PETSC_USE_CTABLE)
32   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
33   for (i=0; i<n; i++){
34     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
35   }
36 #else
37   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
38   PetscLogObjectMemory(mat,mat->N*sizeof(int));
39   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
41 #endif
42   PetscFunctionReturn(0);
43 }
44 
45 #define CHUNKSIZE   15
46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
47 { \
48  \
49     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
50     rmax = aimax[row]; nrow = ailen[row];  \
51     col1 = col - shift; \
52      \
53     low = 0; high = nrow; \
54     while (high-low > 5) { \
55       t = (low+high)/2; \
56       if (rp[t] > col) high = t; \
57       else             low  = t; \
58     } \
59       for (_i=low; _i<high; _i++) { \
60         if (rp[_i] > col1) break; \
61         if (rp[_i] == col1) { \
62           if (addv == ADD_VALUES) ap[_i] += value;   \
63           else                  ap[_i] = value; \
64           goto a_noinsert; \
65         } \
66       }  \
67       if (nonew == 1) goto a_noinsert; \
68       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
69       if (nrow >= rmax) { \
70         /* there is no extra room in row, therefore enlarge */ \
71         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
72         PetscScalar *new_a; \
73  \
74         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
75  \
76         /* malloc new storage space */ \
77         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
78         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
79         new_j   = (int*)(new_a + new_nz); \
80         new_i   = new_j + new_nz; \
81  \
82         /* copy over old data into new slots */ \
83         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
84         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
85         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
86         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
87         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
88                                                            len*sizeof(int));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
91                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
92         /* free up old matrix storage */ \
93  \
94         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
95         if (!a->singlemalloc) { \
96            ierr = PetscFree(a->i);CHKERRQ(ierr); \
97            ierr = PetscFree(a->j);CHKERRQ(ierr); \
98         } \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = PETSC_TRUE; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
104         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for (ii=N; ii>=_i; ii--) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       a_noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
121 { \
122  \
123     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
124     rmax = bimax[row]; nrow = bilen[row];  \
125     col1 = col - shift; \
126      \
127     low = 0; high = nrow; \
128     while (high-low > 5) { \
129       t = (low+high)/2; \
130       if (rp[t] > col) high = t; \
131       else             low  = t; \
132     } \
133        for (_i=low; _i<high; _i++) { \
134         if (rp[_i] > col1) break; \
135         if (rp[_i] == col1) { \
136           if (addv == ADD_VALUES) ap[_i] += value;   \
137           else                  ap[_i] = value; \
138           goto b_noinsert; \
139         } \
140       }  \
141       if (nonew == 1) goto b_noinsert; \
142       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
143       if (nrow >= rmax) { \
144         /* there is no extra room in row, therefore enlarge */ \
145         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
146         PetscScalar *new_a; \
147  \
148         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
149  \
150         /* malloc new storage space */ \
151         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
152         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
153         new_j   = (int*)(new_a + new_nz); \
154         new_i   = new_j + new_nz; \
155  \
156         /* copy over old data into new slots */ \
157         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
158         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
159         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
160         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
161         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
162                                                            len*sizeof(int));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
165                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
166         /* free up old matrix storage */ \
167  \
168         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
169         if (!b->singlemalloc) { \
170           ierr = PetscFree(b->i);CHKERRQ(ierr); \
171           ierr = PetscFree(b->j);CHKERRQ(ierr); \
172         } \
173         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
174         b->singlemalloc = PETSC_TRUE; \
175  \
176         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
177         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
178         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
179         b->maxnz += CHUNKSIZE; \
180         b->reallocs++; \
181       } \
182       N = nrow++ - 1; b->nz++; \
183       /* shift up all the later entries in this row */ \
184       for (ii=N; ii>=_i; ii--) { \
185         rp[ii+1] = rp[ii]; \
186         ap[ii+1] = ap[ii]; \
187       } \
188       rp[_i] = col1;  \
189       ap[_i] = value;  \
190       b_noinsert: ; \
191       bilen[row] = nrow; \
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatSetValues_MPIAIJ"
196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
197 {
198   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
199   PetscScalar  value;
200   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
201   int          cstart = aij->cstart,cend = aij->cend,row,col;
202   PetscTruth   roworiented = aij->roworiented;
203 
204   /* Some Variables required in the macro */
205   Mat          A = aij->A;
206   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
207   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
208   PetscScalar  *aa = a->a;
209   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
210   Mat          B = aij->B;
211   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
212   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
213   PetscScalar  *ba = b->a;
214 
215   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
216   int          nonew = a->nonew,shift = a->indexshift;
217   PetscScalar  *ap;
218 
219   PetscFunctionBegin;
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_BOPT_g)
223     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
224 #endif
225     if (im[i] >= rstart && im[i] < rend) {
226       row = im[i] - rstart;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart && in[j] < cend){
229           col = in[j] - cstart;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           if (ignorezeroentries && value == 0.0) continue;
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
291     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
296         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   PetscScalar *val;
358   InsertMode  addv = mat->insertmode;
359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
360   PetscTruth  flag;
361 #endif
362 
363   PetscFunctionBegin;
364   if (!aij->donotstash) {
365     while (1) {
366       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
367       if (!flg) break;
368 
369       for (i=0; i<n;) {
370         /* Now identify the consecutive vals belonging to the same row */
371         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
372         if (j < n) ncols = j-i;
373         else       ncols = n-i;
374         /* Now assemble all these values with a single function call */
375         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
376         i = j;
377       }
378     }
379     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
380   }
381 
382   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
384 
385   /* determine if any processor has disassembled, if so we must
386      also disassemble ourselfs, in order that we may reassemble. */
387   /*
388      if nonzero structure of submatrix B cannot change then we know that
389      no processor disassembled thus we can skip this stuff
390   */
391   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
392     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
393     if (mat->was_assembled && !other_disassembled) {
394       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
395     }
396   }
397 
398   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
399     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
400   }
401   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
402   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
403 
404   if (aij->rowvalues) {
405     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
406     aij->rowvalues = 0;
407   }
408 #if defined(PETSC_HAVE_SUPERLUDIST)
409   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
410   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
411 #endif
412 
413 #if defined(PETSC_HAVE_SPOOLES)
414   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
415   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
416 #endif
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
422 int MatZeroEntries_MPIAIJ(Mat A)
423 {
424   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
425   int        ierr;
426 
427   PetscFunctionBegin;
428   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
429   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatZeroRows_MPIAIJ"
435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
438   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
439   int            *procs,*nprocs,j,idx,nsends,*work,row;
440   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
441   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447   PetscTruth     found;
448 
449   PetscFunctionBegin;
450   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
451   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
452 
453   /*  first count number of contributors to each processor */
454   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
455   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
456   procs  = nprocs + size;
457   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
458   for (i=0; i<N; i++) {
459     idx = rows[i];
460     found = PETSC_FALSE;
461     for (j=0; j<size; j++) {
462       if (idx >= owners[j] && idx < owners[j+1]) {
463         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
464       }
465     }
466     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
467   }
468   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
469 
470   /* inform other processors of number of messages and max length*/
471   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
472   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
473   nrecvs = work[size+rank];
474   nmax   = work[rank];
475   ierr   = PetscFree(work);CHKERRQ(ierr);
476 
477   /* post receives:   */
478   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
480   for (i=0; i<nrecvs; i++) {
481     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
489   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
490   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
491   starts[0] = 0;
492   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
493   for (i=0; i<N; i++) {
494     svalues[starts[owner[i]]++] = rows[i];
495   }
496   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
497 
498   starts[0] = 0;
499   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
500   count = 0;
501   for (i=0; i<size; i++) {
502     if (procs[i]) {
503       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
504     }
505   }
506   ierr = PetscFree(starts);CHKERRQ(ierr);
507 
508   base = owners[rank];
509 
510   /*  wait on receives */
511   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
512   source = lens + nrecvs;
513   count  = nrecvs; slen = 0;
514   while (count) {
515     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
516     /* unpack receives into our local space */
517     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
518     source[imdex]  = recv_status.MPI_SOURCE;
519     lens[imdex]    = n;
520     slen          += n;
521     count--;
522   }
523   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
524 
525   /* move the data into the send scatter */
526   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<nrecvs; i++) {
529     values = rvalues + i*nmax;
530     for (j=0; j<lens[i]; j++) {
531       lrows[count++] = values[j] - base;
532     }
533   }
534   ierr = PetscFree(rvalues);CHKERRQ(ierr);
535   ierr = PetscFree(lens);CHKERRQ(ierr);
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(nprocs);CHKERRQ(ierr);
538 
539   /* actually zap the local rows */
540   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
541   PetscLogObjectParent(A,istmp);
542 
543   /*
544         Zero the required rows. If the "diagonal block" of the matrix
545      is square and the user wishes to set the diagonal we use seperate
546      code so that MatSetValues() is not called for each diagonal allocating
547      new memory, thus calling lots of mallocs and slowing things down.
548 
549        Contributed by: Mathew Knepley
550   */
551   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
552   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
553   if (diag && (l->A->M == l->A->N)) {
554     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
555   } else if (diag) {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
558       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
560     }
561     for (i = 0; i < slen; i++) {
562       row  = lrows[i] + rstart;
563       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   } else {
568     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
569   }
570   ierr = ISDestroy(istmp);CHKERRQ(ierr);
571   ierr = PetscFree(lrows);CHKERRQ(ierr);
572 
573   /* wait on sends */
574   if (nsends) {
575     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
576     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
577     ierr = PetscFree(send_status);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(send_waits);CHKERRQ(ierr);
580   ierr = PetscFree(svalues);CHKERRQ(ierr);
581 
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatMult_MPIAIJ"
587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
588 {
589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590   int        ierr,nt;
591 
592   PetscFunctionBegin;
593   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
594   if (nt != A->n) {
595     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
596   }
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultAdd_MPIAIJ"
606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
614   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
615   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
624   int        ierr;
625 
626   PetscFunctionBegin;
627   /* do nondiagonal part */
628   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
629   /* send it on its way */
630   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   /* do local part */
632   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
633   /* receive remote parts: note this assumes the values are not actually */
634   /* inserted in yy until the next line, which is true for my implementation*/
635   /* but is not perhaps always true. */
636   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
645   int        ierr;
646 
647   PetscFunctionBegin;
648   /* do nondiagonal part */
649   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
650   /* send it on its way */
651   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   /* do local part */
653   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
654   /* receive remote parts: note this assumes the values are not actually */
655   /* inserted in yy until the next line, which is true for my implementation*/
656   /* but is not perhaps always true. */
657   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*
662   This only works correctly for square matrices where the subblock A->A is the
663    diagonal block
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
668 {
669   int        ierr;
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671 
672   PetscFunctionBegin;
673   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
674   if (a->rstart != a->cstart || a->rend != a->cend) {
675     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
676   }
677   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatScale_MPIAIJ"
683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
684 {
685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
690   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatDestroy_MPIAIJ"
696 int MatDestroy_MPIAIJ(Mat mat)
697 {
698   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
699   int        ierr;
700 
701   PetscFunctionBegin;
702 #if defined(PETSC_USE_LOG)
703   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
723 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
731   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
732   PetscTruth        isdraw,isascii,flg;
733   PetscViewer       sviewer;
734   PetscViewerFormat format;
735 
736   PetscFunctionBegin;
737   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
739   if (isascii) {
740     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
741     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
742       MatInfo info;
743       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
744       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
746       if (flg) {
747         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
748 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
749       } else {
750         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
751 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
752       }
753       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
756       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
757       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
758       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     } else if (format == PETSC_VIEWER_ASCII_INFO) {
761       PetscFunctionReturn(0);
762     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
764       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
765 #endif
766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
767       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
768 #endif
769 
770       PetscFunctionReturn(0);
771     }
772   } else if (isdraw) {
773     PetscDraw       draw;
774     PetscTruth isnull;
775     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
776     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
777   }
778 
779   if (size == 1) {
780     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
781     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
782   } else {
783     /* assemble the entire matrix onto first processor. */
784     Mat         A;
785     Mat_SeqAIJ *Aloc;
786     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
787     PetscScalar *a;
788 
789     if (!rank) {
790       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
791     } else {
792       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
793     }
794     PetscLogObjectParent(mat,A);
795 
796     /* copy over the A part */
797     Aloc = (Mat_SeqAIJ*)aij->A->data;
798     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
799     row = aij->rstart;
800     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
801     for (i=0; i<m; i++) {
802       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
803       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
804     }
805     aj = Aloc->j;
806     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
807 
808     /* copy over the B part */
809     Aloc = (Mat_SeqAIJ*)aij->B->data;
810     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
811     row  = aij->rstart;
812     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
813     ct   = cols;
814     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
815     for (i=0; i<m; i++) {
816       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
817       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
818     }
819     ierr = PetscFree(ct);CHKERRQ(ierr);
820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     /*
823        Everyone has to call to draw the matrix since the graphics waits are
824        synchronized across all processors that share the PetscDraw object
825     */
826     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
829       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
830     }
831     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
832     ierr = MatDestroy(A);CHKERRQ(ierr);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatView_MPIAIJ"
839 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
840 {
841   int        ierr;
842   PetscTruth isascii,isdraw,issocket,isbinary;
843 
844   PetscFunctionBegin;
845   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
846   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
847   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
848   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
849   if (isascii || isdraw || isbinary || issocket) {
850     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
851   } else {
852     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatRelax_MPIAIJ"
861 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
862 {
863   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
864   int          ierr;
865   Vec          bb1;
866   PetscScalar  mone=-1.0;
867 
868   PetscFunctionBegin;
869   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
870 
871   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
872 
873   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
876       its--;
877     }
878 
879     while (its--) {
880       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882 
883       /* update rhs: bb1 = bb - B*x */
884       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
885       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
886 
887       /* local sweep */
888       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
889       CHKERRQ(ierr);
890     }
891   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
892     if (flag & SOR_ZERO_INITIAL_GUESS) {
893       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
894       its--;
895     }
896     while (its--) {
897       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
899 
900       /* update rhs: bb1 = bb - B*x */
901       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
902       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
903 
904       /* local sweep */
905       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
906       CHKERRQ(ierr);
907     }
908   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
909     if (flag & SOR_ZERO_INITIAL_GUESS) {
910       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
911       its--;
912     }
913     while (its--) {
914       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
915       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
916 
917       /* update rhs: bb1 = bb - B*x */
918       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
919       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
920 
921       /* local sweep */
922       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
923       CHKERRQ(ierr);
924     }
925   } else {
926     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
927   }
928 
929   ierr = VecDestroy(bb1);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatGetInfo_MPIAIJ"
935 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
936 {
937   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
938   Mat        A = mat->A,B = mat->B;
939   int        ierr;
940   PetscReal  isend[5],irecv[5];
941 
942   PetscFunctionBegin;
943   info->block_size     = 1.0;
944   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
945   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
946   isend[3] = info->memory;  isend[4] = info->mallocs;
947   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
948   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
949   isend[3] += info->memory;  isend[4] += info->mallocs;
950   if (flag == MAT_LOCAL) {
951     info->nz_used      = isend[0];
952     info->nz_allocated = isend[1];
953     info->nz_unneeded  = isend[2];
954     info->memory       = isend[3];
955     info->mallocs      = isend[4];
956   } else if (flag == MAT_GLOBAL_MAX) {
957     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
958     info->nz_used      = irecv[0];
959     info->nz_allocated = irecv[1];
960     info->nz_unneeded  = irecv[2];
961     info->memory       = irecv[3];
962     info->mallocs      = irecv[4];
963   } else if (flag == MAT_GLOBAL_SUM) {
964     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
965     info->nz_used      = irecv[0];
966     info->nz_allocated = irecv[1];
967     info->nz_unneeded  = irecv[2];
968     info->memory       = irecv[3];
969     info->mallocs      = irecv[4];
970   }
971   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
972   info->fill_ratio_needed = 0;
973   info->factor_mallocs    = 0;
974   info->rows_global       = (double)matin->M;
975   info->columns_global    = (double)matin->N;
976   info->rows_local        = (double)matin->m;
977   info->columns_local     = (double)matin->N;
978 
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatSetOption_MPIAIJ"
984 int MatSetOption_MPIAIJ(Mat A,MatOption op)
985 {
986   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
987   int        ierr;
988 
989   PetscFunctionBegin;
990   switch (op) {
991   case MAT_NO_NEW_NONZERO_LOCATIONS:
992   case MAT_YES_NEW_NONZERO_LOCATIONS:
993   case MAT_COLUMNS_UNSORTED:
994   case MAT_COLUMNS_SORTED:
995   case MAT_NEW_NONZERO_ALLOCATION_ERR:
996   case MAT_KEEP_ZEROED_ROWS:
997   case MAT_NEW_NONZERO_LOCATION_ERR:
998   case MAT_USE_INODES:
999   case MAT_DO_NOT_USE_INODES:
1000   case MAT_IGNORE_ZERO_ENTRIES:
1001     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1002     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1003     break;
1004   case MAT_ROW_ORIENTED:
1005     a->roworiented = PETSC_TRUE;
1006     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1007     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1008     break;
1009   case MAT_ROWS_SORTED:
1010   case MAT_ROWS_UNSORTED:
1011   case MAT_YES_NEW_DIAGONALS:
1012   case MAT_USE_SINGLE_PRECISION_SOLVES:
1013     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1014     break;
1015   case MAT_COLUMN_ORIENTED:
1016     a->roworiented = PETSC_FALSE;
1017     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1018     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1019     break;
1020   case MAT_IGNORE_OFF_PROC_ENTRIES:
1021     a->donotstash = PETSC_TRUE;
1022     break;
1023   case MAT_NO_NEW_DIAGONALS:
1024     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1025   default:
1026     SETERRQ(PETSC_ERR_SUP,"unknown option");
1027   }
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetRow_MPIAIJ"
1033 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1034 {
1035   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1036   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1037   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1038   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1039   int          *cmap,*idx_p;
1040 
1041   PetscFunctionBegin;
1042   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1043   mat->getrowactive = PETSC_TRUE;
1044 
1045   if (!mat->rowvalues && (idx || v)) {
1046     /*
1047         allocate enough space to hold information from the longest row.
1048     */
1049     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1050     int     max = 1,tmp;
1051     for (i=0; i<matin->m; i++) {
1052       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1053       if (max < tmp) { max = tmp; }
1054     }
1055     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1056     mat->rowindices = (int*)(mat->rowvalues + max);
1057   }
1058 
1059   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1060   lrow = row - rstart;
1061 
1062   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1063   if (!v)   {pvA = 0; pvB = 0;}
1064   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1065   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1066   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1067   nztot = nzA + nzB;
1068 
1069   cmap  = mat->garray;
1070   if (v  || idx) {
1071     if (nztot) {
1072       /* Sort by increasing column numbers, assuming A and B already sorted */
1073       int imark = -1;
1074       if (v) {
1075         *v = v_p = mat->rowvalues;
1076         for (i=0; i<nzB; i++) {
1077           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1078           else break;
1079         }
1080         imark = i;
1081         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1082         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1083       }
1084       if (idx) {
1085         *idx = idx_p = mat->rowindices;
1086         if (imark > -1) {
1087           for (i=0; i<imark; i++) {
1088             idx_p[i] = cmap[cworkB[i]];
1089           }
1090         } else {
1091           for (i=0; i<nzB; i++) {
1092             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1093             else break;
1094           }
1095           imark = i;
1096         }
1097         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1098         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1099       }
1100     } else {
1101       if (idx) *idx = 0;
1102       if (v)   *v   = 0;
1103     }
1104   }
1105   *nz = nztot;
1106   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1107   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1113 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1114 {
1115   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1116 
1117   PetscFunctionBegin;
1118   if (aij->getrowactive == PETSC_FALSE) {
1119     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1120   }
1121   aij->getrowactive = PETSC_FALSE;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatNorm_MPIAIJ"
1127 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1128 {
1129   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1130   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1131   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1132   PetscReal    sum = 0.0;
1133   PetscScalar  *v;
1134 
1135   PetscFunctionBegin;
1136   if (aij->size == 1) {
1137     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1138   } else {
1139     if (type == NORM_FROBENIUS) {
1140       v = amat->a;
1141       for (i=0; i<amat->nz; i++) {
1142 #if defined(PETSC_USE_COMPLEX)
1143         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1144 #else
1145         sum += (*v)*(*v); v++;
1146 #endif
1147       }
1148       v = bmat->a;
1149       for (i=0; i<bmat->nz; i++) {
1150 #if defined(PETSC_USE_COMPLEX)
1151         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1152 #else
1153         sum += (*v)*(*v); v++;
1154 #endif
1155       }
1156       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1157       *norm = sqrt(*norm);
1158     } else if (type == NORM_1) { /* max column norm */
1159       PetscReal *tmp,*tmp2;
1160       int    *jj,*garray = aij->garray;
1161       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1162       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1163       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1164       *norm = 0.0;
1165       v = amat->a; jj = amat->j;
1166       for (j=0; j<amat->nz; j++) {
1167         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1168       }
1169       v = bmat->a; jj = bmat->j;
1170       for (j=0; j<bmat->nz; j++) {
1171         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1172       }
1173       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1174       for (j=0; j<mat->N; j++) {
1175         if (tmp2[j] > *norm) *norm = tmp2[j];
1176       }
1177       ierr = PetscFree(tmp);CHKERRQ(ierr);
1178       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1179     } else if (type == NORM_INFINITY) { /* max row norm */
1180       PetscReal ntemp = 0.0;
1181       for (j=0; j<aij->A->m; j++) {
1182         v = amat->a + amat->i[j] + shift;
1183         sum = 0.0;
1184         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1185           sum += PetscAbsScalar(*v); v++;
1186         }
1187         v = bmat->a + bmat->i[j] + shift;
1188         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1189           sum += PetscAbsScalar(*v); v++;
1190         }
1191         if (sum > ntemp) ntemp = sum;
1192       }
1193       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1194     } else {
1195       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1196     }
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatTranspose_MPIAIJ"
1203 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1204 {
1205   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1206   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1207   int          ierr,shift = Aloc->indexshift;
1208   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1209   Mat          B;
1210   PetscScalar  *array;
1211 
1212   PetscFunctionBegin;
1213   if (!matout && M != N) {
1214     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1215   }
1216 
1217   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1218 
1219   /* copy over the A part */
1220   Aloc = (Mat_SeqAIJ*)a->A->data;
1221   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1222   row = a->rstart;
1223   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1224   for (i=0; i<m; i++) {
1225     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1226     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1227   }
1228   aj = Aloc->j;
1229   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1230 
1231   /* copy over the B part */
1232   Aloc = (Mat_SeqAIJ*)a->B->data;
1233   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1234   row  = a->rstart;
1235   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1236   ct   = cols;
1237   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1238   for (i=0; i<m; i++) {
1239     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1240     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1241   }
1242   ierr = PetscFree(ct);CHKERRQ(ierr);
1243   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1244   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1245   if (matout) {
1246     *matout = B;
1247   } else {
1248     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1255 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1256 {
1257   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1258   Mat        a = aij->A,b = aij->B;
1259   int        ierr,s1,s2,s3;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1263   if (rr) {
1264     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1265     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1266     /* Overlap communication with computation. */
1267     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1268   }
1269   if (ll) {
1270     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1271     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1272     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1273   }
1274   /* scale  the diagonal block */
1275   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1276 
1277   if (rr) {
1278     /* Do a scatter end and then right scale the off-diagonal block */
1279     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1280     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1281   }
1282 
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1289 int MatPrintHelp_MPIAIJ(Mat A)
1290 {
1291   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1292   int        ierr;
1293 
1294   PetscFunctionBegin;
1295   if (!a->rank) {
1296     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1303 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1304 {
1305   PetscFunctionBegin;
1306   *bs = 1;
1307   PetscFunctionReturn(0);
1308 }
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1311 int MatSetUnfactored_MPIAIJ(Mat A)
1312 {
1313   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1314   int        ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatEqual_MPIAIJ"
1323 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1324 {
1325   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1326   Mat        a,b,c,d;
1327   PetscTruth flg;
1328   int        ierr;
1329 
1330   PetscFunctionBegin;
1331   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1332   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg == PETSC_TRUE) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPIAIJ"
1346 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   int        ierr;
1349   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1350   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1351   PetscTruth flg;
1352 
1353   PetscFunctionBegin;
1354   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1355   if (str != SAME_NONZERO_PATTERN || !flg) {
1356     /* because of the column compression in the off-processor part of the matrix a->B,
1357        the number of columns in a->B and b->B may be different, hence we cannot call
1358        the MatCopy() directly on the two parts. If need be, we can provide a more
1359        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1360        then copying the submatrices */
1361     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1362   } else {
1363     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1364     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1371 int MatSetUpPreallocation_MPIAIJ(Mat A)
1372 {
1373   int        ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1381 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1382 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1383 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1384 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1385 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1386 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1387 #endif
1388 
1389 #include "petscblaslapack.h"
1390 
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatAXPY_MPIAIJ"
1393 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1394 {
1395   int        ierr,one;
1396   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1397   Mat_SeqAIJ *x,*y;
1398 
1399   PetscFunctionBegin;
1400   if (str == SAME_NONZERO_PATTERN) {
1401     x  = (Mat_SeqAIJ *)xx->A->data;
1402     y  = (Mat_SeqAIJ *)yy->A->data;
1403     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1404     x  = (Mat_SeqAIJ *)xx->B->data;
1405     y  = (Mat_SeqAIJ *)yy->B->data;
1406     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1407   } else {
1408     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 /* -------------------------------------------------------------------*/
1414 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1415        MatGetRow_MPIAIJ,
1416        MatRestoreRow_MPIAIJ,
1417        MatMult_MPIAIJ,
1418        MatMultAdd_MPIAIJ,
1419        MatMultTranspose_MPIAIJ,
1420        MatMultTransposeAdd_MPIAIJ,
1421        0,
1422        0,
1423        0,
1424        0,
1425        0,
1426        0,
1427        MatRelax_MPIAIJ,
1428        MatTranspose_MPIAIJ,
1429        MatGetInfo_MPIAIJ,
1430        MatEqual_MPIAIJ,
1431        MatGetDiagonal_MPIAIJ,
1432        MatDiagonalScale_MPIAIJ,
1433        MatNorm_MPIAIJ,
1434        MatAssemblyBegin_MPIAIJ,
1435        MatAssemblyEnd_MPIAIJ,
1436        0,
1437        MatSetOption_MPIAIJ,
1438        MatZeroEntries_MPIAIJ,
1439        MatZeroRows_MPIAIJ,
1440 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1441        MatLUFactorSymbolic_MPIAIJ_TFS,
1442 #else
1443        0,
1444 #endif
1445        0,
1446        0,
1447        0,
1448        MatSetUpPreallocation_MPIAIJ,
1449        0,
1450        0,
1451        0,
1452        0,
1453        MatDuplicate_MPIAIJ,
1454        0,
1455        0,
1456        0,
1457        0,
1458        MatAXPY_MPIAIJ,
1459        MatGetSubMatrices_MPIAIJ,
1460        MatIncreaseOverlap_MPIAIJ,
1461        MatGetValues_MPIAIJ,
1462        MatCopy_MPIAIJ,
1463        MatPrintHelp_MPIAIJ,
1464        MatScale_MPIAIJ,
1465        0,
1466        0,
1467        0,
1468        MatGetBlockSize_MPIAIJ,
1469        0,
1470        0,
1471        0,
1472        0,
1473        MatFDColoringCreate_MPIAIJ,
1474        0,
1475        MatSetUnfactored_MPIAIJ,
1476        0,
1477        0,
1478        MatGetSubMatrix_MPIAIJ,
1479        MatDestroy_MPIAIJ,
1480        MatView_MPIAIJ,
1481        MatGetPetscMaps_Petsc,
1482        0,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        0,
1489        0,
1490        MatSetColoring_MPIAIJ,
1491        MatSetValuesAdic_MPIAIJ,
1492        MatSetValuesAdifor_MPIAIJ
1493 };
1494 
1495 /* ----------------------------------------------------------------------------------------*/
1496 
1497 EXTERN_C_BEGIN
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1500 int MatStoreValues_MPIAIJ(Mat mat)
1501 {
1502   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1503   int        ierr;
1504 
1505   PetscFunctionBegin;
1506   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1507   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 EXTERN_C_END
1511 
1512 EXTERN_C_BEGIN
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1515 int MatRetrieveValues_MPIAIJ(Mat mat)
1516 {
1517   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1518   int        ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1522   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 EXTERN_C_END
1526 
1527 #include "petscpc.h"
1528 EXTERN_C_BEGIN
1529 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1530 EXTERN_C_END
1531 
1532 EXTERN_C_BEGIN
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatCreate_MPIAIJ"
1535 int MatCreate_MPIAIJ(Mat B)
1536 {
1537   Mat_MPIAIJ *b;
1538   int        ierr,i,size;
1539 
1540   PetscFunctionBegin;
1541   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1542 
1543   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1544   B->data         = (void*)b;
1545   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1546   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1547   B->factor       = 0;
1548   B->assembled    = PETSC_FALSE;
1549   B->mapping      = 0;
1550 
1551   B->insertmode      = NOT_SET_VALUES;
1552   b->size            = size;
1553   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1554 
1555   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1556   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1557 
1558   /* the information in the maps duplicates the information computed below, eventually
1559      we should remove the duplicate information that is not contained in the maps */
1560   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1561   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1562 
1563   /* build local table of row and column ownerships */
1564   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1565   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1566   b->cowners = b->rowners + b->size + 2;
1567   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1568   b->rowners[0] = 0;
1569   for (i=2; i<=b->size; i++) {
1570     b->rowners[i] += b->rowners[i-1];
1571   }
1572   b->rstart = b->rowners[b->rank];
1573   b->rend   = b->rowners[b->rank+1];
1574   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1575   b->cowners[0] = 0;
1576   for (i=2; i<=b->size; i++) {
1577     b->cowners[i] += b->cowners[i-1];
1578   }
1579   b->cstart = b->cowners[b->rank];
1580   b->cend   = b->cowners[b->rank+1];
1581 
1582   /* build cache for off array entries formed */
1583   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1584   b->donotstash  = PETSC_FALSE;
1585   b->colmap      = 0;
1586   b->garray      = 0;
1587   b->roworiented = PETSC_TRUE;
1588 
1589   /* stuff used for matrix vector multiply */
1590   b->lvec      = PETSC_NULL;
1591   b->Mvctx     = PETSC_NULL;
1592 
1593   /* stuff for MatGetRow() */
1594   b->rowindices   = 0;
1595   b->rowvalues    = 0;
1596   b->getrowactive = PETSC_FALSE;
1597 
1598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1599                                      "MatStoreValues_MPIAIJ",
1600                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1602                                      "MatRetrieveValues_MPIAIJ",
1603                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1605                                      "MatGetDiagonalBlock_MPIAIJ",
1606                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1607 
1608   PetscFunctionReturn(0);
1609 }
1610 EXTERN_C_END
1611 
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1614 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1615 {
1616   Mat        mat;
1617   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1618   int        ierr;
1619 
1620   PetscFunctionBegin;
1621   *newmat       = 0;
1622   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1623   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1624   a    = (Mat_MPIAIJ*)mat->data;
1625   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1626   mat->factor       = matin->factor;
1627   mat->assembled    = PETSC_TRUE;
1628   mat->insertmode   = NOT_SET_VALUES;
1629   mat->preallocated = PETSC_TRUE;
1630 
1631   a->rstart       = oldmat->rstart;
1632   a->rend         = oldmat->rend;
1633   a->cstart       = oldmat->cstart;
1634   a->cend         = oldmat->cend;
1635   a->size         = oldmat->size;
1636   a->rank         = oldmat->rank;
1637   a->donotstash   = oldmat->donotstash;
1638   a->roworiented  = oldmat->roworiented;
1639   a->rowindices   = 0;
1640   a->rowvalues    = 0;
1641   a->getrowactive = PETSC_FALSE;
1642 
1643   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1644   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1645   if (oldmat->colmap) {
1646 #if defined (PETSC_USE_CTABLE)
1647     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1648 #else
1649     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1650     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1651     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1652 #endif
1653   } else a->colmap = 0;
1654   if (oldmat->garray) {
1655     int len;
1656     len  = oldmat->B->n;
1657     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1658     PetscLogObjectMemory(mat,len*sizeof(int));
1659     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1660   } else a->garray = 0;
1661 
1662   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1663   PetscLogObjectParent(mat,a->lvec);
1664   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1665   PetscLogObjectParent(mat,a->Mvctx);
1666   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1667   PetscLogObjectParent(mat,a->A);
1668   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1669   PetscLogObjectParent(mat,a->B);
1670   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1671   *newmat = mat;
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #include "petscsys.h"
1676 
1677 EXTERN_C_BEGIN
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatLoad_MPIAIJ"
1680 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1681 {
1682   Mat          A;
1683   PetscScalar  *vals,*svals;
1684   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1685   MPI_Status   status;
1686   int          i,nz,ierr,j,rstart,rend,fd;
1687   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1688   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1689   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1690 
1691   PetscFunctionBegin;
1692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1693   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1694   if (!rank) {
1695     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1696     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1697     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1698     if (header[3] < 0) {
1699       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1700     }
1701   }
1702 
1703   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1704   M = header[1]; N = header[2];
1705   /* determine ownership of all rows */
1706   m = M/size + ((M % size) > rank);
1707   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1708   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1709   rowners[0] = 0;
1710   for (i=2; i<=size; i++) {
1711     rowners[i] += rowners[i-1];
1712   }
1713   rstart = rowners[rank];
1714   rend   = rowners[rank+1];
1715 
1716   /* distribute row lengths to all processors */
1717   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1718   offlens = ourlens + (rend-rstart);
1719   if (!rank) {
1720     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1721     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1722     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1723     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1724     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1725     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1726   } else {
1727     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1728   }
1729 
1730   if (!rank) {
1731     /* calculate the number of nonzeros on each processor */
1732     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1733     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1734     for (i=0; i<size; i++) {
1735       for (j=rowners[i]; j< rowners[i+1]; j++) {
1736         procsnz[i] += rowlengths[j];
1737       }
1738     }
1739     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1740 
1741     /* determine max buffer needed and allocate it */
1742     maxnz = 0;
1743     for (i=0; i<size; i++) {
1744       maxnz = PetscMax(maxnz,procsnz[i]);
1745     }
1746     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1747 
1748     /* read in my part of the matrix column indices  */
1749     nz   = procsnz[0];
1750     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1751     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1752 
1753     /* read in every one elses and ship off */
1754     for (i=1; i<size; i++) {
1755       nz   = procsnz[i];
1756       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1757       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1758     }
1759     ierr = PetscFree(cols);CHKERRQ(ierr);
1760   } else {
1761     /* determine buffer space needed for message */
1762     nz = 0;
1763     for (i=0; i<m; i++) {
1764       nz += ourlens[i];
1765     }
1766     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1767 
1768     /* receive message of column indices*/
1769     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1770     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1771     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1772   }
1773 
1774   /* determine column ownership if matrix is not square */
1775   if (N != M) {
1776     n      = N/size + ((N % size) > rank);
1777     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1778     cstart = cend - n;
1779   } else {
1780     cstart = rstart;
1781     cend   = rend;
1782     n      = cend - cstart;
1783   }
1784 
1785   /* loop over local rows, determining number of off diagonal entries */
1786   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1787   jj = 0;
1788   for (i=0; i<m; i++) {
1789     for (j=0; j<ourlens[i]; j++) {
1790       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1791       jj++;
1792     }
1793   }
1794 
1795   /* create our matrix */
1796   for (i=0; i<m; i++) {
1797     ourlens[i] -= offlens[i];
1798   }
1799   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1800   A = *newmat;
1801   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1802   for (i=0; i<m; i++) {
1803     ourlens[i] += offlens[i];
1804   }
1805 
1806   if (!rank) {
1807     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1808 
1809     /* read in my part of the matrix numerical values  */
1810     nz   = procsnz[0];
1811     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1812 
1813     /* insert into matrix */
1814     jj      = rstart;
1815     smycols = mycols;
1816     svals   = vals;
1817     for (i=0; i<m; i++) {
1818       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1819       smycols += ourlens[i];
1820       svals   += ourlens[i];
1821       jj++;
1822     }
1823 
1824     /* read in other processors and ship out */
1825     for (i=1; i<size; i++) {
1826       nz   = procsnz[i];
1827       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1828       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1829     }
1830     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1831   } else {
1832     /* receive numeric values */
1833     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1834 
1835     /* receive message of values*/
1836     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1837     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1838     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1839 
1840     /* insert into matrix */
1841     jj      = rstart;
1842     smycols = mycols;
1843     svals   = vals;
1844     for (i=0; i<m; i++) {
1845       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1846       smycols += ourlens[i];
1847       svals   += ourlens[i];
1848       jj++;
1849     }
1850   }
1851   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1852   ierr = PetscFree(vals);CHKERRQ(ierr);
1853   ierr = PetscFree(mycols);CHKERRQ(ierr);
1854   ierr = PetscFree(rowners);CHKERRQ(ierr);
1855 
1856   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1857   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 EXTERN_C_END
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1864 /*
1865     Not great since it makes two copies of the submatrix, first an SeqAIJ
1866   in local and then by concatenating the local matrices the end result.
1867   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1868 */
1869 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1870 {
1871   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1872   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1873   Mat          *local,M,Mreuse;
1874   PetscScalar  *vwork,*aa;
1875   MPI_Comm     comm = mat->comm;
1876   Mat_SeqAIJ   *aij;
1877 
1878 
1879   PetscFunctionBegin;
1880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1881   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1882 
1883   if (call ==  MAT_REUSE_MATRIX) {
1884     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1885     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1886     local = &Mreuse;
1887     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1888   } else {
1889     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1890     Mreuse = *local;
1891     ierr   = PetscFree(local);CHKERRQ(ierr);
1892   }
1893 
1894   /*
1895       m - number of local rows
1896       n - number of columns (same on all processors)
1897       rstart - first row in new global matrix generated
1898   */
1899   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1900   if (call == MAT_INITIAL_MATRIX) {
1901     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1902     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1903     ii  = aij->i;
1904     jj  = aij->j;
1905 
1906     /*
1907         Determine the number of non-zeros in the diagonal and off-diagonal
1908         portions of the matrix in order to do correct preallocation
1909     */
1910 
1911     /* first get start and end of "diagonal" columns */
1912     if (csize == PETSC_DECIDE) {
1913       nlocal = n/size + ((n % size) > rank);
1914     } else {
1915       nlocal = csize;
1916     }
1917     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1918     rstart = rend - nlocal;
1919     if (rank == size - 1 && rend != n) {
1920       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1921     }
1922 
1923     /* next, compute all the lengths */
1924     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1925     olens = dlens + m;
1926     for (i=0; i<m; i++) {
1927       jend = ii[i+1] - ii[i];
1928       olen = 0;
1929       dlen = 0;
1930       for (j=0; j<jend; j++) {
1931         if (*jj < rstart || *jj >= rend) olen++;
1932         else dlen++;
1933         jj++;
1934       }
1935       olens[i] = olen;
1936       dlens[i] = dlen;
1937     }
1938     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1939     ierr = PetscFree(dlens);CHKERRQ(ierr);
1940   } else {
1941     int ml,nl;
1942 
1943     M = *newmat;
1944     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1945     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1946     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1947     /*
1948          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1949        rather than the slower MatSetValues().
1950     */
1951     M->was_assembled = PETSC_TRUE;
1952     M->assembled     = PETSC_FALSE;
1953   }
1954   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1955   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1956   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1957   ii  = aij->i;
1958   jj  = aij->j;
1959   aa  = aij->a;
1960   for (i=0; i<m; i++) {
1961     row   = rstart + i;
1962     nz    = ii[i+1] - ii[i];
1963     cwork = jj;     jj += nz;
1964     vwork = aa;     aa += nz;
1965     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1966   }
1967 
1968   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1970   *newmat = M;
1971 
1972   /* save submatrix used in processor for next request */
1973   if (call ==  MAT_INITIAL_MATRIX) {
1974     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1975     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1976   }
1977 
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1983 /*@C
1984    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1985    (the default parallel PETSc format).  For good matrix assembly performance
1986    the user should preallocate the matrix storage by setting the parameters
1987    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1988    performance can be increased by more than a factor of 50.
1989 
1990    Collective on MPI_Comm
1991 
1992    Input Parameters:
1993 +  A - the matrix
1994 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1995            (same value is used for all local rows)
1996 .  d_nnz - array containing the number of nonzeros in the various rows of the
1997            DIAGONAL portion of the local submatrix (possibly different for each row)
1998            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1999            The size of this array is equal to the number of local rows, i.e 'm'.
2000            You must leave room for the diagonal entry even if it is zero.
2001 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2002            submatrix (same value is used for all local rows).
2003 -  o_nnz - array containing the number of nonzeros in the various rows of the
2004            OFF-DIAGONAL portion of the local submatrix (possibly different for
2005            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2006            structure. The size of this array is equal to the number
2007            of local rows, i.e 'm'.
2008 
2009    The AIJ format (also called the Yale sparse matrix format or
2010    compressed row storage), is fully compatible with standard Fortran 77
2011    storage.  That is, the stored row and column indices can begin at
2012    either one (as in Fortran) or zero.  See the users manual for details.
2013 
2014    The user MUST specify either the local or global matrix dimensions
2015    (possibly both).
2016 
2017    The parallel matrix is partitioned such that the first m0 rows belong to
2018    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2019    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2020 
2021    The DIAGONAL portion of the local submatrix of a processor can be defined
2022    as the submatrix which is obtained by extraction the part corresponding
2023    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2024    first row that belongs to the processor, and r2 is the last row belonging
2025    to the this processor. This is a square mxm matrix. The remaining portion
2026    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2027 
2028    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2029 
2030    By default, this format uses inodes (identical nodes) when possible.
2031    We search for consecutive rows with the same nonzero structure, thereby
2032    reusing matrix information to achieve increased efficiency.
2033 
2034    Options Database Keys:
2035 +  -mat_aij_no_inode  - Do not use inodes
2036 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2037 -  -mat_aij_oneindex - Internally use indexing starting at 1
2038         rather than 0.  Note that when calling MatSetValues(),
2039         the user still MUST index entries starting at 0!
2040 
2041    Example usage:
2042 
2043    Consider the following 8x8 matrix with 34 non-zero values, that is
2044    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2045    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2046    as follows:
2047 
2048 .vb
2049             1  2  0  |  0  3  0  |  0  4
2050     Proc0   0  5  6  |  7  0  0  |  8  0
2051             9  0 10  | 11  0  0  | 12  0
2052     -------------------------------------
2053            13  0 14  | 15 16 17  |  0  0
2054     Proc1   0 18  0  | 19 20 21  |  0  0
2055             0  0  0  | 22 23  0  | 24  0
2056     -------------------------------------
2057     Proc2  25 26 27  |  0  0 28  | 29  0
2058            30  0  0  | 31 32 33  |  0 34
2059 .ve
2060 
2061    This can be represented as a collection of submatrices as:
2062 
2063 .vb
2064       A B C
2065       D E F
2066       G H I
2067 .ve
2068 
2069    Where the submatrices A,B,C are owned by proc0, D,E,F are
2070    owned by proc1, G,H,I are owned by proc2.
2071 
2072    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2073    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2074    The 'M','N' parameters are 8,8, and have the same values on all procs.
2075 
2076    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2077    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2078    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2079    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2080    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2081    matrix, ans [DF] as another SeqAIJ matrix.
2082 
2083    When d_nz, o_nz parameters are specified, d_nz storage elements are
2084    allocated for every row of the local diagonal submatrix, and o_nz
2085    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2086    One way to choose d_nz and o_nz is to use the max nonzerors per local
2087    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2088    In this case, the values of d_nz,o_nz are:
2089 .vb
2090      proc0 : dnz = 2, o_nz = 2
2091      proc1 : dnz = 3, o_nz = 2
2092      proc2 : dnz = 1, o_nz = 4
2093 .ve
2094    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2095    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2096    for proc3. i.e we are using 12+15+10=37 storage locations to store
2097    34 values.
2098 
2099    When d_nnz, o_nnz parameters are specified, the storage is specified
2100    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2101    In the above case the values for d_nnz,o_nnz are:
2102 .vb
2103      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2104      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2105      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2106 .ve
2107    Here the space allocated is sum of all the above values i.e 34, and
2108    hence pre-allocation is perfect.
2109 
2110    Level: intermediate
2111 
2112 .keywords: matrix, aij, compressed row, sparse, parallel
2113 
2114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2115 @*/
2116 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2117 {
2118   Mat_MPIAIJ   *b;
2119   int          ierr,i;
2120   PetscTruth   flg2;
2121 
2122   PetscFunctionBegin;
2123   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2124   if (!flg2) PetscFunctionReturn(0);
2125   B->preallocated = PETSC_TRUE;
2126   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2127   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2128   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2129   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2130   if (d_nnz) {
2131     for (i=0; i<B->m; i++) {
2132       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2133     }
2134   }
2135   if (o_nnz) {
2136     for (i=0; i<B->m; i++) {
2137       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2138     }
2139   }
2140   b = (Mat_MPIAIJ*)B->data;
2141 
2142   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2143   PetscLogObjectParent(B,b->A);
2144   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2145   PetscLogObjectParent(B,b->B);
2146 
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatCreateMPIAIJ"
2152 /*@C
2153    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2154    (the default parallel PETSc format).  For good matrix assembly performance
2155    the user should preallocate the matrix storage by setting the parameters
2156    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2157    performance can be increased by more than a factor of 50.
2158 
2159    Collective on MPI_Comm
2160 
2161    Input Parameters:
2162 +  comm - MPI communicator
2163 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2164            This value should be the same as the local size used in creating the
2165            y vector for the matrix-vector product y = Ax.
2166 .  n - This value should be the same as the local size used in creating the
2167        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2168        calculated if N is given) For square matrices n is almost always m.
2169 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2170 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2171 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2172            (same value is used for all local rows)
2173 .  d_nnz - array containing the number of nonzeros in the various rows of the
2174            DIAGONAL portion of the local submatrix (possibly different for each row)
2175            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2176            The size of this array is equal to the number of local rows, i.e 'm'.
2177            You must leave room for the diagonal entry even if it is zero.
2178 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2179            submatrix (same value is used for all local rows).
2180 -  o_nnz - array containing the number of nonzeros in the various rows of the
2181            OFF-DIAGONAL portion of the local submatrix (possibly different for
2182            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2183            structure. The size of this array is equal to the number
2184            of local rows, i.e 'm'.
2185 
2186    Output Parameter:
2187 .  A - the matrix
2188 
2189    Notes:
2190    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2191    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2192    storage requirements for this matrix.
2193 
2194    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2195    processor than it must be used on all processors that share the object for
2196    that argument.
2197 
2198    The AIJ format (also called the Yale sparse matrix format or
2199    compressed row storage), is fully compatible with standard Fortran 77
2200    storage.  That is, the stored row and column indices can begin at
2201    either one (as in Fortran) or zero.  See the users manual for details.
2202 
2203    The user MUST specify either the local or global matrix dimensions
2204    (possibly both).
2205 
2206    The parallel matrix is partitioned such that the first m0 rows belong to
2207    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2208    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2209 
2210    The DIAGONAL portion of the local submatrix of a processor can be defined
2211    as the submatrix which is obtained by extraction the part corresponding
2212    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2213    first row that belongs to the processor, and r2 is the last row belonging
2214    to the this processor. This is a square mxm matrix. The remaining portion
2215    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2216 
2217    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2218 
2219    By default, this format uses inodes (identical nodes) when possible.
2220    We search for consecutive rows with the same nonzero structure, thereby
2221    reusing matrix information to achieve increased efficiency.
2222 
2223    Options Database Keys:
2224 +  -mat_aij_no_inode  - Do not use inodes
2225 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2226 -  -mat_aij_oneindex - Internally use indexing starting at 1
2227         rather than 0.  Note that when calling MatSetValues(),
2228         the user still MUST index entries starting at 0!
2229 
2230 
2231    Example usage:
2232 
2233    Consider the following 8x8 matrix with 34 non-zero values, that is
2234    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2235    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2236    as follows:
2237 
2238 .vb
2239             1  2  0  |  0  3  0  |  0  4
2240     Proc0   0  5  6  |  7  0  0  |  8  0
2241             9  0 10  | 11  0  0  | 12  0
2242     -------------------------------------
2243            13  0 14  | 15 16 17  |  0  0
2244     Proc1   0 18  0  | 19 20 21  |  0  0
2245             0  0  0  | 22 23  0  | 24  0
2246     -------------------------------------
2247     Proc2  25 26 27  |  0  0 28  | 29  0
2248            30  0  0  | 31 32 33  |  0 34
2249 .ve
2250 
2251    This can be represented as a collection of submatrices as:
2252 
2253 .vb
2254       A B C
2255       D E F
2256       G H I
2257 .ve
2258 
2259    Where the submatrices A,B,C are owned by proc0, D,E,F are
2260    owned by proc1, G,H,I are owned by proc2.
2261 
2262    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2263    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2264    The 'M','N' parameters are 8,8, and have the same values on all procs.
2265 
2266    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2267    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2268    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2269    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2270    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2271    matrix, ans [DF] as another SeqAIJ matrix.
2272 
2273    When d_nz, o_nz parameters are specified, d_nz storage elements are
2274    allocated for every row of the local diagonal submatrix, and o_nz
2275    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2276    One way to choose d_nz and o_nz is to use the max nonzerors per local
2277    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2278    In this case, the values of d_nz,o_nz are:
2279 .vb
2280      proc0 : dnz = 2, o_nz = 2
2281      proc1 : dnz = 3, o_nz = 2
2282      proc2 : dnz = 1, o_nz = 4
2283 .ve
2284    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2285    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2286    for proc3. i.e we are using 12+15+10=37 storage locations to store
2287    34 values.
2288 
2289    When d_nnz, o_nnz parameters are specified, the storage is specified
2290    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2291    In the above case the values for d_nnz,o_nnz are:
2292 .vb
2293      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2294      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2295      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2296 .ve
2297    Here the space allocated is sum of all the above values i.e 34, and
2298    hence pre-allocation is perfect.
2299 
2300    Level: intermediate
2301 
2302 .keywords: matrix, aij, compressed row, sparse, parallel
2303 
2304 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2305 @*/
2306 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2307 {
2308   int ierr,size;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2312   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2313   if (size > 1) {
2314     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2315     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2316   } else {
2317     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2318     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2319   }
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2325 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2326 {
2327   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2328   PetscFunctionBegin;
2329   *Ad     = a->A;
2330   *Ao     = a->B;
2331   *colmap = a->garray;
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2337 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2338 {
2339   int        ierr;
2340   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2341 
2342   PetscFunctionBegin;
2343   if (coloring->ctype == IS_COLORING_LOCAL) {
2344     int        *allcolors,*colors,i;
2345     ISColoring ocoloring;
2346 
2347     /* set coloring for diagonal portion */
2348     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2349 
2350     /* set coloring for off-diagonal portion */
2351     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2352     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2353     for (i=0; i<a->B->n; i++) {
2354       colors[i] = allcolors[a->garray[i]];
2355     }
2356     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2357     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2358     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2359     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2360   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2361     int        *colors,i,*larray;
2362     ISColoring ocoloring;
2363 
2364     /* set coloring for diagonal portion */
2365     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2366     for (i=0; i<a->A->n; i++) {
2367       larray[i] = i + a->cstart;
2368     }
2369     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2370     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2371     for (i=0; i<a->A->n; i++) {
2372       colors[i] = coloring->colors[larray[i]];
2373     }
2374     ierr = PetscFree(larray);CHKERRQ(ierr);
2375     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2376     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2377     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2378 
2379     /* set coloring for off-diagonal portion */
2380     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2381     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2382     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2383     for (i=0; i<a->B->n; i++) {
2384       colors[i] = coloring->colors[larray[i]];
2385     }
2386     ierr = PetscFree(larray);CHKERRQ(ierr);
2387     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2388     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2389     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2390   } else {
2391     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2392   }
2393 
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2399 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2400 {
2401   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2402   int        ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2406   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2412 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2413 {
2414   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2415   int        ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2419   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422