xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a0452e342fa39dddcd02a770babb2f6ac8b99ab3)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 EXTERN int MatUseSpooles_MPIAIJ(Mat);
15 
16 /*
17   Local utility routine that creates a mapping from the global column
18 number to the local number in the off-diagonal part of the local
19 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
20 a slightly higher hash table cost; without it it is not scalable (each processor
21 has an order N integer array but is fast to acess.
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
25 int CreateColmap_MPIAIJ_Private(Mat mat)
26 {
27   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
28   int        n = aij->B->n,i,ierr;
29 
30   PetscFunctionBegin;
31 #if defined (PETSC_USE_CTABLE)
32   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
33   for (i=0; i<n; i++){
34     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
35   }
36 #else
37   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
38   PetscLogObjectMemory(mat,mat->N*sizeof(int));
39   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
41 #endif
42   PetscFunctionReturn(0);
43 }
44 
45 #define CHUNKSIZE   15
46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
47 { \
48  \
49     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
50     rmax = aimax[row]; nrow = ailen[row];  \
51     col1 = col - shift; \
52      \
53     low = 0; high = nrow; \
54     while (high-low > 5) { \
55       t = (low+high)/2; \
56       if (rp[t] > col) high = t; \
57       else             low  = t; \
58     } \
59       for (_i=low; _i<high; _i++) { \
60         if (rp[_i] > col1) break; \
61         if (rp[_i] == col1) { \
62           if (addv == ADD_VALUES) ap[_i] += value;   \
63           else                  ap[_i] = value; \
64           goto a_noinsert; \
65         } \
66       }  \
67       if (nonew == 1) goto a_noinsert; \
68       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
69       if (nrow >= rmax) { \
70         /* there is no extra room in row, therefore enlarge */ \
71         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
72         PetscScalar *new_a; \
73  \
74         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
75  \
76         /* malloc new storage space */ \
77         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
78         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
79         new_j   = (int*)(new_a + new_nz); \
80         new_i   = new_j + new_nz; \
81  \
82         /* copy over old data into new slots */ \
83         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
84         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
85         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
86         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
87         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
88                                                            len*sizeof(int));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
90         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
91                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
92         /* free up old matrix storage */ \
93  \
94         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
95         if (!a->singlemalloc) { \
96            ierr = PetscFree(a->i);CHKERRQ(ierr); \
97            ierr = PetscFree(a->j);CHKERRQ(ierr); \
98         } \
99         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
100         a->singlemalloc = PETSC_TRUE; \
101  \
102         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
103         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
104         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
105         a->maxnz += CHUNKSIZE; \
106         a->reallocs++; \
107       } \
108       N = nrow++ - 1; a->nz++; \
109       /* shift up all the later entries in this row */ \
110       for (ii=N; ii>=_i; ii--) { \
111         rp[ii+1] = rp[ii]; \
112         ap[ii+1] = ap[ii]; \
113       } \
114       rp[_i] = col1;  \
115       ap[_i] = value;  \
116       a_noinsert: ; \
117       ailen[row] = nrow; \
118 }
119 
120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
121 { \
122  \
123     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
124     rmax = bimax[row]; nrow = bilen[row];  \
125     col1 = col - shift; \
126      \
127     low = 0; high = nrow; \
128     while (high-low > 5) { \
129       t = (low+high)/2; \
130       if (rp[t] > col) high = t; \
131       else             low  = t; \
132     } \
133        for (_i=low; _i<high; _i++) { \
134         if (rp[_i] > col1) break; \
135         if (rp[_i] == col1) { \
136           if (addv == ADD_VALUES) ap[_i] += value;   \
137           else                  ap[_i] = value; \
138           goto b_noinsert; \
139         } \
140       }  \
141       if (nonew == 1) goto b_noinsert; \
142       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
143       if (nrow >= rmax) { \
144         /* there is no extra room in row, therefore enlarge */ \
145         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
146         PetscScalar *new_a; \
147  \
148         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
149  \
150         /* malloc new storage space */ \
151         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
152         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
153         new_j   = (int*)(new_a + new_nz); \
154         new_i   = new_j + new_nz; \
155  \
156         /* copy over old data into new slots */ \
157         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
158         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
159         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
160         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
161         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
162                                                            len*sizeof(int));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
164         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
165                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
166         /* free up old matrix storage */ \
167  \
168         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
169         if (!b->singlemalloc) { \
170           ierr = PetscFree(b->i);CHKERRQ(ierr); \
171           ierr = PetscFree(b->j);CHKERRQ(ierr); \
172         } \
173         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
174         b->singlemalloc = PETSC_TRUE; \
175  \
176         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
177         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
178         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
179         b->maxnz += CHUNKSIZE; \
180         b->reallocs++; \
181       } \
182       N = nrow++ - 1; b->nz++; \
183       /* shift up all the later entries in this row */ \
184       for (ii=N; ii>=_i; ii--) { \
185         rp[ii+1] = rp[ii]; \
186         ap[ii+1] = ap[ii]; \
187       } \
188       rp[_i] = col1;  \
189       ap[_i] = value;  \
190       b_noinsert: ; \
191       bilen[row] = nrow; \
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatSetValues_MPIAIJ"
196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
197 {
198   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
199   PetscScalar  value;
200   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
201   int          cstart = aij->cstart,cend = aij->cend,row,col;
202   PetscTruth   roworiented = aij->roworiented;
203 
204   /* Some Variables required in the macro */
205   Mat          A = aij->A;
206   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
207   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
208   PetscScalar  *aa = a->a;
209   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
210   Mat          B = aij->B;
211   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
212   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
213   PetscScalar  *ba = b->a;
214 
215   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
216   int          nonew = a->nonew,shift = a->indexshift;
217   PetscScalar  *ap;
218 
219   PetscFunctionBegin;
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_BOPT_g)
223     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
224 #endif
225     if (im[i] >= rstart && im[i] < rend) {
226       row = im[i] - rstart;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart && in[j] < cend){
229           col = in[j] - cstart;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           if (ignorezeroentries && value == 0.0) continue;
232           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
291     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
296         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   PetscScalar *val;
358   InsertMode  addv = mat->insertmode;
359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
360   PetscTruth  flag;
361 #endif
362 
363   PetscFunctionBegin;
364   if (!aij->donotstash) {
365     while (1) {
366       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
367       if (!flg) break;
368 
369       for (i=0; i<n;) {
370         /* Now identify the consecutive vals belonging to the same row */
371         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
372         if (j < n) ncols = j-i;
373         else       ncols = n-i;
374         /* Now assemble all these values with a single function call */
375         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
376         i = j;
377       }
378     }
379     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
380   }
381 
382   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
384 
385   /* determine if any processor has disassembled, if so we must
386      also disassemble ourselfs, in order that we may reassemble. */
387   /*
388      if nonzero structure of submatrix B cannot change then we know that
389      no processor disassembled thus we can skip this stuff
390   */
391   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
392     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
393     if (mat->was_assembled && !other_disassembled) {
394       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
395     }
396   }
397 
398   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
399     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
400   }
401   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
402   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
403 
404   if (aij->rowvalues) {
405     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
406     aij->rowvalues = 0;
407   }
408 #if defined(PETSC_HAVE_SUPERLUDIST)
409   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
410   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
411 #endif
412 
413 #if defined(PETSC_HAVE_SPOOLES)
414   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
415   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
416 #endif
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
422 int MatZeroEntries_MPIAIJ(Mat A)
423 {
424   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
425   int        ierr;
426 
427   PetscFunctionBegin;
428   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
429   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatZeroRows_MPIAIJ"
435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
438   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
439   int            *procs,*nprocs,j,idx,nsends,*work,row;
440   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
441   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447   PetscTruth     found;
448 
449   PetscFunctionBegin;
450   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
451   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
452 
453   /*  first count number of contributors to each processor */
454   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
455   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
456   procs  = nprocs + size;
457   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
458   for (i=0; i<N; i++) {
459     idx = rows[i];
460     found = PETSC_FALSE;
461     for (j=0; j<size; j++) {
462       if (idx >= owners[j] && idx < owners[j+1]) {
463         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
464       }
465     }
466     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
467   }
468   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
469 
470   /* inform other processors of number of messages and max length*/
471   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
472   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
473   nrecvs = work[size+rank];
474   nmax   = work[rank];
475   ierr   = PetscFree(work);CHKERRQ(ierr);
476 
477   /* post receives:   */
478   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
480   for (i=0; i<nrecvs; i++) {
481     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
489   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
490   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
491   starts[0] = 0;
492   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
493   for (i=0; i<N; i++) {
494     svalues[starts[owner[i]]++] = rows[i];
495   }
496   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
497 
498   starts[0] = 0;
499   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
500   count = 0;
501   for (i=0; i<size; i++) {
502     if (procs[i]) {
503       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
504     }
505   }
506   ierr = PetscFree(starts);CHKERRQ(ierr);
507 
508   base = owners[rank];
509 
510   /*  wait on receives */
511   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
512   source = lens + nrecvs;
513   count  = nrecvs; slen = 0;
514   while (count) {
515     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
516     /* unpack receives into our local space */
517     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
518     source[imdex]  = recv_status.MPI_SOURCE;
519     lens[imdex]    = n;
520     slen          += n;
521     count--;
522   }
523   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
524 
525   /* move the data into the send scatter */
526   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<nrecvs; i++) {
529     values = rvalues + i*nmax;
530     for (j=0; j<lens[i]; j++) {
531       lrows[count++] = values[j] - base;
532     }
533   }
534   ierr = PetscFree(rvalues);CHKERRQ(ierr);
535   ierr = PetscFree(lens);CHKERRQ(ierr);
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(nprocs);CHKERRQ(ierr);
538 
539   /* actually zap the local rows */
540   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
541   PetscLogObjectParent(A,istmp);
542 
543   /*
544         Zero the required rows. If the "diagonal block" of the matrix
545      is square and the user wishes to set the diagonal we use seperate
546      code so that MatSetValues() is not called for each diagonal allocating
547      new memory, thus calling lots of mallocs and slowing things down.
548 
549        Contributed by: Mathew Knepley
550   */
551   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
552   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
553   if (diag && (l->A->M == l->A->N)) {
554     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
555   } else if (diag) {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
558       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
560     }
561     for (i = 0; i < slen; i++) {
562       row  = lrows[i] + rstart;
563       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   } else {
568     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
569   }
570   ierr = ISDestroy(istmp);CHKERRQ(ierr);
571   ierr = PetscFree(lrows);CHKERRQ(ierr);
572 
573   /* wait on sends */
574   if (nsends) {
575     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
576     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
577     ierr = PetscFree(send_status);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(send_waits);CHKERRQ(ierr);
580   ierr = PetscFree(svalues);CHKERRQ(ierr);
581 
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatMult_MPIAIJ"
587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
588 {
589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590   int        ierr,nt;
591 
592   PetscFunctionBegin;
593   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
594   if (nt != A->n) {
595     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
596   }
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultAdd_MPIAIJ"
606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
614   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
615   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
624   int        ierr;
625 
626   PetscFunctionBegin;
627   /* do nondiagonal part */
628   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
629   /* send it on its way */
630   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   /* do local part */
632   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
633   /* receive remote parts: note this assumes the values are not actually */
634   /* inserted in yy until the next line, which is true for my implementation*/
635   /* but is not perhaps always true. */
636   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
645   int        ierr;
646 
647   PetscFunctionBegin;
648   /* do nondiagonal part */
649   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
650   /* send it on its way */
651   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   /* do local part */
653   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
654   /* receive remote parts: note this assumes the values are not actually */
655   /* inserted in yy until the next line, which is true for my implementation*/
656   /* but is not perhaps always true. */
657   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*
662   This only works correctly for square matrices where the subblock A->A is the
663    diagonal block
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
668 {
669   int        ierr;
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671 
672   PetscFunctionBegin;
673   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
674   if (a->rstart != a->cstart || a->rend != a->cend) {
675     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
676   }
677   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatScale_MPIAIJ"
683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
684 {
685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
690   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatDestroy_MPIAIJ"
696 int MatDestroy_MPIAIJ(Mat mat)
697 {
698   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
699   int        ierr;
700 
701   PetscFunctionBegin;
702 #if defined(PETSC_USE_LOG)
703   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
723 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
731   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
732   PetscTruth        isdraw,isascii,flg;
733   PetscViewer       sviewer;
734   PetscViewerFormat format;
735 
736   PetscFunctionBegin;
737   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
739   if (isascii) {
740     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
741     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
742       MatInfo info;
743       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
744       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
745       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
746       if (flg) {
747         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
748 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
749       } else {
750         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
751 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
752       }
753       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
754       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
755       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
756       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
757       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
758       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     } else if (format == PETSC_VIEWER_ASCII_INFO) {
761       PetscFunctionReturn(0);
762     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
764       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
765 #endif
766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
767       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
768 #endif
769       PetscFunctionReturn(0);
770     }
771   } else if (isdraw) {
772     PetscDraw       draw;
773     PetscTruth isnull;
774     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
775     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
776   }
777 
778   if (size == 1) {
779     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
780     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
781   } else {
782     /* assemble the entire matrix onto first processor. */
783     Mat         A;
784     Mat_SeqAIJ *Aloc;
785     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
786     PetscScalar *a;
787 
788     if (!rank) {
789       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
790     } else {
791       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     }
793     PetscLogObjectParent(mat,A);
794 
795     /* copy over the A part */
796     Aloc = (Mat_SeqAIJ*)aij->A->data;
797     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
798     row = aij->rstart;
799     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
800     for (i=0; i<m; i++) {
801       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
802       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
803     }
804     aj = Aloc->j;
805     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
806 
807     /* copy over the B part */
808     Aloc = (Mat_SeqAIJ*)aij->B->data;
809     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
810     row  = aij->rstart;
811     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
812     ct   = cols;
813     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
814     for (i=0; i<m; i++) {
815       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
816       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
817     }
818     ierr = PetscFree(ct);CHKERRQ(ierr);
819     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     /*
822        Everyone has to call to draw the matrix since the graphics waits are
823        synchronized across all processors that share the PetscDraw object
824     */
825     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
826     if (!rank) {
827       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
828       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
829     }
830     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
831     ierr = MatDestroy(A);CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatView_MPIAIJ"
838 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
839 {
840   int        ierr;
841   PetscTruth isascii,isdraw,issocket,isbinary;
842 
843   PetscFunctionBegin;
844   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
845   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
846   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
847   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
848   if (isascii || isdraw || isbinary || issocket) {
849     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
850   } else {
851     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatRelax_MPIAIJ"
860 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
861 {
862   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
863   int          ierr;
864   Vec          bb1;
865   PetscScalar  mone=-1.0;
866 
867   PetscFunctionBegin;
868   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
869 
870   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
871 
872   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
875       its--;
876     }
877 
878     while (its--) {
879       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881 
882       /* update rhs: bb1 = bb - B*x */
883       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
884       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
885 
886       /* local sweep */
887       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
888       CHKERRQ(ierr);
889     }
890   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
893       its--;
894     }
895     while (its--) {
896       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
897       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
898 
899       /* update rhs: bb1 = bb - B*x */
900       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
901       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
902 
903       /* local sweep */
904       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
905       CHKERRQ(ierr);
906     }
907   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
908     if (flag & SOR_ZERO_INITIAL_GUESS) {
909       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
910       its--;
911     }
912     while (its--) {
913       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
914       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
915 
916       /* update rhs: bb1 = bb - B*x */
917       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
918       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
919 
920       /* local sweep */
921       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
922       CHKERRQ(ierr);
923     }
924   } else {
925     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
926   }
927 
928   ierr = VecDestroy(bb1);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatGetInfo_MPIAIJ"
934 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
935 {
936   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
937   Mat        A = mat->A,B = mat->B;
938   int        ierr;
939   PetscReal  isend[5],irecv[5];
940 
941   PetscFunctionBegin;
942   info->block_size     = 1.0;
943   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
944   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
945   isend[3] = info->memory;  isend[4] = info->mallocs;
946   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
947   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
948   isend[3] += info->memory;  isend[4] += info->mallocs;
949   if (flag == MAT_LOCAL) {
950     info->nz_used      = isend[0];
951     info->nz_allocated = isend[1];
952     info->nz_unneeded  = isend[2];
953     info->memory       = isend[3];
954     info->mallocs      = isend[4];
955   } else if (flag == MAT_GLOBAL_MAX) {
956     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
957     info->nz_used      = irecv[0];
958     info->nz_allocated = irecv[1];
959     info->nz_unneeded  = irecv[2];
960     info->memory       = irecv[3];
961     info->mallocs      = irecv[4];
962   } else if (flag == MAT_GLOBAL_SUM) {
963     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
964     info->nz_used      = irecv[0];
965     info->nz_allocated = irecv[1];
966     info->nz_unneeded  = irecv[2];
967     info->memory       = irecv[3];
968     info->mallocs      = irecv[4];
969   }
970   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
971   info->fill_ratio_needed = 0;
972   info->factor_mallocs    = 0;
973   info->rows_global       = (double)matin->M;
974   info->columns_global    = (double)matin->N;
975   info->rows_local        = (double)matin->m;
976   info->columns_local     = (double)matin->N;
977 
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatSetOption_MPIAIJ"
983 int MatSetOption_MPIAIJ(Mat A,MatOption op)
984 {
985   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
986   int        ierr;
987 
988   PetscFunctionBegin;
989   switch (op) {
990   case MAT_NO_NEW_NONZERO_LOCATIONS:
991   case MAT_YES_NEW_NONZERO_LOCATIONS:
992   case MAT_COLUMNS_UNSORTED:
993   case MAT_COLUMNS_SORTED:
994   case MAT_NEW_NONZERO_ALLOCATION_ERR:
995   case MAT_KEEP_ZEROED_ROWS:
996   case MAT_NEW_NONZERO_LOCATION_ERR:
997   case MAT_USE_INODES:
998   case MAT_DO_NOT_USE_INODES:
999   case MAT_IGNORE_ZERO_ENTRIES:
1000     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1001     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1002     break;
1003   case MAT_ROW_ORIENTED:
1004     a->roworiented = PETSC_TRUE;
1005     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1006     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1007     break;
1008   case MAT_ROWS_SORTED:
1009   case MAT_ROWS_UNSORTED:
1010   case MAT_YES_NEW_DIAGONALS:
1011   case MAT_USE_SINGLE_PRECISION_SOLVES:
1012     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1013     break;
1014   case MAT_COLUMN_ORIENTED:
1015     a->roworiented = PETSC_FALSE;
1016     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1017     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1018     break;
1019   case MAT_IGNORE_OFF_PROC_ENTRIES:
1020     a->donotstash = PETSC_TRUE;
1021     break;
1022   case MAT_NO_NEW_DIAGONALS:
1023     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1024   default:
1025     SETERRQ(PETSC_ERR_SUP,"unknown option");
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatGetRow_MPIAIJ"
1032 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1033 {
1034   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1035   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1036   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1037   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1038   int          *cmap,*idx_p;
1039 
1040   PetscFunctionBegin;
1041   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1042   mat->getrowactive = PETSC_TRUE;
1043 
1044   if (!mat->rowvalues && (idx || v)) {
1045     /*
1046         allocate enough space to hold information from the longest row.
1047     */
1048     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1049     int     max = 1,tmp;
1050     for (i=0; i<matin->m; i++) {
1051       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1052       if (max < tmp) { max = tmp; }
1053     }
1054     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1055     mat->rowindices = (int*)(mat->rowvalues + max);
1056   }
1057 
1058   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1059   lrow = row - rstart;
1060 
1061   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1062   if (!v)   {pvA = 0; pvB = 0;}
1063   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1064   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1065   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1066   nztot = nzA + nzB;
1067 
1068   cmap  = mat->garray;
1069   if (v  || idx) {
1070     if (nztot) {
1071       /* Sort by increasing column numbers, assuming A and B already sorted */
1072       int imark = -1;
1073       if (v) {
1074         *v = v_p = mat->rowvalues;
1075         for (i=0; i<nzB; i++) {
1076           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1077           else break;
1078         }
1079         imark = i;
1080         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1081         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1082       }
1083       if (idx) {
1084         *idx = idx_p = mat->rowindices;
1085         if (imark > -1) {
1086           for (i=0; i<imark; i++) {
1087             idx_p[i] = cmap[cworkB[i]];
1088           }
1089         } else {
1090           for (i=0; i<nzB; i++) {
1091             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1092             else break;
1093           }
1094           imark = i;
1095         }
1096         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1097         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1098       }
1099     } else {
1100       if (idx) *idx = 0;
1101       if (v)   *v   = 0;
1102     }
1103   }
1104   *nz = nztot;
1105   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1106   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1112 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1113 {
1114   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1115 
1116   PetscFunctionBegin;
1117   if (aij->getrowactive == PETSC_FALSE) {
1118     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1119   }
1120   aij->getrowactive = PETSC_FALSE;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatNorm_MPIAIJ"
1126 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1127 {
1128   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1129   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1130   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1131   PetscReal    sum = 0.0;
1132   PetscScalar  *v;
1133 
1134   PetscFunctionBegin;
1135   if (aij->size == 1) {
1136     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1137   } else {
1138     if (type == NORM_FROBENIUS) {
1139       v = amat->a;
1140       for (i=0; i<amat->nz; i++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1143 #else
1144         sum += (*v)*(*v); v++;
1145 #endif
1146       }
1147       v = bmat->a;
1148       for (i=0; i<bmat->nz; i++) {
1149 #if defined(PETSC_USE_COMPLEX)
1150         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1151 #else
1152         sum += (*v)*(*v); v++;
1153 #endif
1154       }
1155       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1156       *norm = sqrt(*norm);
1157     } else if (type == NORM_1) { /* max column norm */
1158       PetscReal *tmp,*tmp2;
1159       int    *jj,*garray = aij->garray;
1160       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1161       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1162       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1163       *norm = 0.0;
1164       v = amat->a; jj = amat->j;
1165       for (j=0; j<amat->nz; j++) {
1166         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1167       }
1168       v = bmat->a; jj = bmat->j;
1169       for (j=0; j<bmat->nz; j++) {
1170         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1171       }
1172       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1173       for (j=0; j<mat->N; j++) {
1174         if (tmp2[j] > *norm) *norm = tmp2[j];
1175       }
1176       ierr = PetscFree(tmp);CHKERRQ(ierr);
1177       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1178     } else if (type == NORM_INFINITY) { /* max row norm */
1179       PetscReal ntemp = 0.0;
1180       for (j=0; j<aij->A->m; j++) {
1181         v = amat->a + amat->i[j] + shift;
1182         sum = 0.0;
1183         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1184           sum += PetscAbsScalar(*v); v++;
1185         }
1186         v = bmat->a + bmat->i[j] + shift;
1187         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1188           sum += PetscAbsScalar(*v); v++;
1189         }
1190         if (sum > ntemp) ntemp = sum;
1191       }
1192       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1193     } else {
1194       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1195     }
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatTranspose_MPIAIJ"
1202 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1203 {
1204   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1205   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1206   int          ierr,shift = Aloc->indexshift;
1207   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1208   Mat          B;
1209   PetscScalar  *array;
1210 
1211   PetscFunctionBegin;
1212   if (!matout && M != N) {
1213     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1214   }
1215 
1216   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1217 
1218   /* copy over the A part */
1219   Aloc = (Mat_SeqAIJ*)a->A->data;
1220   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1221   row = a->rstart;
1222   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1223   for (i=0; i<m; i++) {
1224     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1225     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1226   }
1227   aj = Aloc->j;
1228   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1229 
1230   /* copy over the B part */
1231   Aloc = (Mat_SeqAIJ*)a->B->data;
1232   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1233   row  = a->rstart;
1234   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1235   ct   = cols;
1236   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1237   for (i=0; i<m; i++) {
1238     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1239     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1240   }
1241   ierr = PetscFree(ct);CHKERRQ(ierr);
1242   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1243   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1244   if (matout) {
1245     *matout = B;
1246   } else {
1247     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1254 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1255 {
1256   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257   Mat        a = aij->A,b = aij->B;
1258   int        ierr,s1,s2,s3;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1262   if (rr) {
1263     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1264     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1265     /* Overlap communication with computation. */
1266     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1267   }
1268   if (ll) {
1269     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1270     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1271     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1272   }
1273   /* scale  the diagonal block */
1274   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1275 
1276   if (rr) {
1277     /* Do a scatter end and then right scale the off-diagonal block */
1278     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1279     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1280   }
1281 
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1288 int MatPrintHelp_MPIAIJ(Mat A)
1289 {
1290   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1291   int        ierr;
1292 
1293   PetscFunctionBegin;
1294   if (!a->rank) {
1295     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1302 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1303 {
1304   PetscFunctionBegin;
1305   *bs = 1;
1306   PetscFunctionReturn(0);
1307 }
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1310 int MatSetUnfactored_MPIAIJ(Mat A)
1311 {
1312   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1313   int        ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatEqual_MPIAIJ"
1322 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1323 {
1324   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1325   Mat        a,b,c,d;
1326   PetscTruth flg;
1327   int        ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1331   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg == PETSC_TRUE) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPIAIJ"
1345 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   int        ierr;
1348   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1349   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1350   PetscTruth flg;
1351 
1352   PetscFunctionBegin;
1353   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1354   if (str != SAME_NONZERO_PATTERN || !flg) {
1355     /* because of the column compression in the off-processor part of the matrix a->B,
1356        the number of columns in a->B and b->B may be different, hence we cannot call
1357        the MatCopy() directly on the two parts. If need be, we can provide a more
1358        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1359        then copying the submatrices */
1360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1361   } else {
1362     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1363     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1370 int MatSetUpPreallocation_MPIAIJ(Mat A)
1371 {
1372   int        ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1380 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1381 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1382 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1383 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1384 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1385 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1386 #endif
1387 
1388 #include "petscblaslapack.h"
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatAXPY_MPIAIJ"
1392 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1393 {
1394   int        ierr,one;
1395   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1396   Mat_SeqAIJ *x,*y;
1397 
1398   PetscFunctionBegin;
1399   if (str == SAME_NONZERO_PATTERN) {
1400     x  = (Mat_SeqAIJ *)xx->A->data;
1401     y  = (Mat_SeqAIJ *)yy->A->data;
1402     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1403     x  = (Mat_SeqAIJ *)xx->B->data;
1404     y  = (Mat_SeqAIJ *)yy->B->data;
1405     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1406   } else {
1407     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 /* -------------------------------------------------------------------*/
1413 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1414        MatGetRow_MPIAIJ,
1415        MatRestoreRow_MPIAIJ,
1416        MatMult_MPIAIJ,
1417        MatMultAdd_MPIAIJ,
1418        MatMultTranspose_MPIAIJ,
1419        MatMultTransposeAdd_MPIAIJ,
1420        0,
1421        0,
1422        0,
1423        0,
1424        0,
1425        0,
1426        MatRelax_MPIAIJ,
1427        MatTranspose_MPIAIJ,
1428        MatGetInfo_MPIAIJ,
1429        MatEqual_MPIAIJ,
1430        MatGetDiagonal_MPIAIJ,
1431        MatDiagonalScale_MPIAIJ,
1432        MatNorm_MPIAIJ,
1433        MatAssemblyBegin_MPIAIJ,
1434        MatAssemblyEnd_MPIAIJ,
1435        0,
1436        MatSetOption_MPIAIJ,
1437        MatZeroEntries_MPIAIJ,
1438        MatZeroRows_MPIAIJ,
1439 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1440        MatLUFactorSymbolic_MPIAIJ_TFS,
1441 #else
1442        0,
1443 #endif
1444        0,
1445        0,
1446        0,
1447        MatSetUpPreallocation_MPIAIJ,
1448        0,
1449        0,
1450        0,
1451        0,
1452        MatDuplicate_MPIAIJ,
1453        0,
1454        0,
1455        0,
1456        0,
1457        MatAXPY_MPIAIJ,
1458        MatGetSubMatrices_MPIAIJ,
1459        MatIncreaseOverlap_MPIAIJ,
1460        MatGetValues_MPIAIJ,
1461        MatCopy_MPIAIJ,
1462        MatPrintHelp_MPIAIJ,
1463        MatScale_MPIAIJ,
1464        0,
1465        0,
1466        0,
1467        MatGetBlockSize_MPIAIJ,
1468        0,
1469        0,
1470        0,
1471        0,
1472        MatFDColoringCreate_MPIAIJ,
1473        0,
1474        MatSetUnfactored_MPIAIJ,
1475        0,
1476        0,
1477        MatGetSubMatrix_MPIAIJ,
1478        MatDestroy_MPIAIJ,
1479        MatView_MPIAIJ,
1480        MatGetPetscMaps_Petsc,
1481        0,
1482        0,
1483        0,
1484        0,
1485        0,
1486        0,
1487        0,
1488        0,
1489        MatSetColoring_MPIAIJ,
1490        MatSetValuesAdic_MPIAIJ,
1491        MatSetValuesAdifor_MPIAIJ
1492 };
1493 
1494 /* ----------------------------------------------------------------------------------------*/
1495 
1496 EXTERN_C_BEGIN
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1499 int MatStoreValues_MPIAIJ(Mat mat)
1500 {
1501   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1502   int        ierr;
1503 
1504   PetscFunctionBegin;
1505   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1506   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 EXTERN_C_BEGIN
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1514 int MatRetrieveValues_MPIAIJ(Mat mat)
1515 {
1516   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1517   int        ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1521   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 EXTERN_C_END
1525 
1526 #include "petscpc.h"
1527 EXTERN_C_BEGIN
1528 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1529 EXTERN_C_END
1530 
1531 EXTERN_C_BEGIN
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatCreate_MPIAIJ"
1534 int MatCreate_MPIAIJ(Mat B)
1535 {
1536   Mat_MPIAIJ *b;
1537   int        ierr,i,size;
1538 
1539   PetscFunctionBegin;
1540   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1541 
1542   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1543   B->data         = (void*)b;
1544   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1545   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1546   B->factor       = 0;
1547   B->assembled    = PETSC_FALSE;
1548   B->mapping      = 0;
1549 
1550   B->insertmode      = NOT_SET_VALUES;
1551   b->size            = size;
1552   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1553 
1554   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1555   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1556 
1557   /* the information in the maps duplicates the information computed below, eventually
1558      we should remove the duplicate information that is not contained in the maps */
1559   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1560   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1561 
1562   /* build local table of row and column ownerships */
1563   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1564   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1565   b->cowners = b->rowners + b->size + 2;
1566   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1567   b->rowners[0] = 0;
1568   for (i=2; i<=b->size; i++) {
1569     b->rowners[i] += b->rowners[i-1];
1570   }
1571   b->rstart = b->rowners[b->rank];
1572   b->rend   = b->rowners[b->rank+1];
1573   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1574   b->cowners[0] = 0;
1575   for (i=2; i<=b->size; i++) {
1576     b->cowners[i] += b->cowners[i-1];
1577   }
1578   b->cstart = b->cowners[b->rank];
1579   b->cend   = b->cowners[b->rank+1];
1580 
1581   /* build cache for off array entries formed */
1582   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1583   b->donotstash  = PETSC_FALSE;
1584   b->colmap      = 0;
1585   b->garray      = 0;
1586   b->roworiented = PETSC_TRUE;
1587 
1588   /* stuff used for matrix vector multiply */
1589   b->lvec      = PETSC_NULL;
1590   b->Mvctx     = PETSC_NULL;
1591 
1592   /* stuff for MatGetRow() */
1593   b->rowindices   = 0;
1594   b->rowvalues    = 0;
1595   b->getrowactive = PETSC_FALSE;
1596 
1597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1598                                      "MatStoreValues_MPIAIJ",
1599                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1601                                      "MatRetrieveValues_MPIAIJ",
1602                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1604                                      "MatGetDiagonalBlock_MPIAIJ",
1605                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1606 
1607   PetscFunctionReturn(0);
1608 }
1609 EXTERN_C_END
1610 
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1613 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1614 {
1615   Mat        mat;
1616   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1617   int        ierr;
1618 
1619   PetscFunctionBegin;
1620   *newmat       = 0;
1621   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1622   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1623   a    = (Mat_MPIAIJ*)mat->data;
1624   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1625   mat->factor       = matin->factor;
1626   mat->assembled    = PETSC_TRUE;
1627   mat->insertmode   = NOT_SET_VALUES;
1628   mat->preallocated = PETSC_TRUE;
1629 
1630   a->rstart       = oldmat->rstart;
1631   a->rend         = oldmat->rend;
1632   a->cstart       = oldmat->cstart;
1633   a->cend         = oldmat->cend;
1634   a->size         = oldmat->size;
1635   a->rank         = oldmat->rank;
1636   a->donotstash   = oldmat->donotstash;
1637   a->roworiented  = oldmat->roworiented;
1638   a->rowindices   = 0;
1639   a->rowvalues    = 0;
1640   a->getrowactive = PETSC_FALSE;
1641 
1642   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1643   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1644   if (oldmat->colmap) {
1645 #if defined (PETSC_USE_CTABLE)
1646     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1647 #else
1648     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1649     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1650     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1651 #endif
1652   } else a->colmap = 0;
1653   if (oldmat->garray) {
1654     int len;
1655     len  = oldmat->B->n;
1656     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1657     PetscLogObjectMemory(mat,len*sizeof(int));
1658     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1659   } else a->garray = 0;
1660 
1661   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1662   PetscLogObjectParent(mat,a->lvec);
1663   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1664   PetscLogObjectParent(mat,a->Mvctx);
1665   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1666   PetscLogObjectParent(mat,a->A);
1667   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1668   PetscLogObjectParent(mat,a->B);
1669   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1670   *newmat = mat;
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #include "petscsys.h"
1675 
1676 EXTERN_C_BEGIN
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatLoad_MPIAIJ"
1679 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1680 {
1681   Mat          A;
1682   PetscScalar  *vals,*svals;
1683   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1684   MPI_Status   status;
1685   int          i,nz,ierr,j,rstart,rend,fd;
1686   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1687   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1688   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1689 
1690   PetscFunctionBegin;
1691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1693   if (!rank) {
1694     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1695     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1696     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1697     if (header[3] < 0) {
1698       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1699     }
1700   }
1701 
1702   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1703   M = header[1]; N = header[2];
1704   /* determine ownership of all rows */
1705   m = M/size + ((M % size) > rank);
1706   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1707   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1708   rowners[0] = 0;
1709   for (i=2; i<=size; i++) {
1710     rowners[i] += rowners[i-1];
1711   }
1712   rstart = rowners[rank];
1713   rend   = rowners[rank+1];
1714 
1715   /* distribute row lengths to all processors */
1716   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1717   offlens = ourlens + (rend-rstart);
1718   if (!rank) {
1719     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1720     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1721     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1722     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1723     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1724     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1725   } else {
1726     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1727   }
1728 
1729   if (!rank) {
1730     /* calculate the number of nonzeros on each processor */
1731     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1732     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1733     for (i=0; i<size; i++) {
1734       for (j=rowners[i]; j< rowners[i+1]; j++) {
1735         procsnz[i] += rowlengths[j];
1736       }
1737     }
1738     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1739 
1740     /* determine max buffer needed and allocate it */
1741     maxnz = 0;
1742     for (i=0; i<size; i++) {
1743       maxnz = PetscMax(maxnz,procsnz[i]);
1744     }
1745     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1746 
1747     /* read in my part of the matrix column indices  */
1748     nz   = procsnz[0];
1749     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1750     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1751 
1752     /* read in every one elses and ship off */
1753     for (i=1; i<size; i++) {
1754       nz   = procsnz[i];
1755       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1756       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1757     }
1758     ierr = PetscFree(cols);CHKERRQ(ierr);
1759   } else {
1760     /* determine buffer space needed for message */
1761     nz = 0;
1762     for (i=0; i<m; i++) {
1763       nz += ourlens[i];
1764     }
1765     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1766 
1767     /* receive message of column indices*/
1768     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1769     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1770     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1771   }
1772 
1773   /* determine column ownership if matrix is not square */
1774   if (N != M) {
1775     n      = N/size + ((N % size) > rank);
1776     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1777     cstart = cend - n;
1778   } else {
1779     cstart = rstart;
1780     cend   = rend;
1781     n      = cend - cstart;
1782   }
1783 
1784   /* loop over local rows, determining number of off diagonal entries */
1785   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1786   jj = 0;
1787   for (i=0; i<m; i++) {
1788     for (j=0; j<ourlens[i]; j++) {
1789       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1790       jj++;
1791     }
1792   }
1793 
1794   /* create our matrix */
1795   for (i=0; i<m; i++) {
1796     ourlens[i] -= offlens[i];
1797   }
1798   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1799   A = *newmat;
1800   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1801   for (i=0; i<m; i++) {
1802     ourlens[i] += offlens[i];
1803   }
1804 
1805   if (!rank) {
1806     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1807 
1808     /* read in my part of the matrix numerical values  */
1809     nz   = procsnz[0];
1810     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1811 
1812     /* insert into matrix */
1813     jj      = rstart;
1814     smycols = mycols;
1815     svals   = vals;
1816     for (i=0; i<m; i++) {
1817       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1818       smycols += ourlens[i];
1819       svals   += ourlens[i];
1820       jj++;
1821     }
1822 
1823     /* read in other processors and ship out */
1824     for (i=1; i<size; i++) {
1825       nz   = procsnz[i];
1826       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1827       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1828     }
1829     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1830   } else {
1831     /* receive numeric values */
1832     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1833 
1834     /* receive message of values*/
1835     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1836     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1837     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1838 
1839     /* insert into matrix */
1840     jj      = rstart;
1841     smycols = mycols;
1842     svals   = vals;
1843     for (i=0; i<m; i++) {
1844       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1845       smycols += ourlens[i];
1846       svals   += ourlens[i];
1847       jj++;
1848     }
1849   }
1850   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1851   ierr = PetscFree(vals);CHKERRQ(ierr);
1852   ierr = PetscFree(mycols);CHKERRQ(ierr);
1853   ierr = PetscFree(rowners);CHKERRQ(ierr);
1854 
1855   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 EXTERN_C_END
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1863 /*
1864     Not great since it makes two copies of the submatrix, first an SeqAIJ
1865   in local and then by concatenating the local matrices the end result.
1866   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1867 */
1868 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1869 {
1870   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1871   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1872   Mat          *local,M,Mreuse;
1873   PetscScalar  *vwork,*aa;
1874   MPI_Comm     comm = mat->comm;
1875   Mat_SeqAIJ   *aij;
1876 
1877 
1878   PetscFunctionBegin;
1879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1881 
1882   if (call ==  MAT_REUSE_MATRIX) {
1883     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1884     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1885     local = &Mreuse;
1886     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1887   } else {
1888     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1889     Mreuse = *local;
1890     ierr   = PetscFree(local);CHKERRQ(ierr);
1891   }
1892 
1893   /*
1894       m - number of local rows
1895       n - number of columns (same on all processors)
1896       rstart - first row in new global matrix generated
1897   */
1898   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1899   if (call == MAT_INITIAL_MATRIX) {
1900     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1901     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1902     ii  = aij->i;
1903     jj  = aij->j;
1904 
1905     /*
1906         Determine the number of non-zeros in the diagonal and off-diagonal
1907         portions of the matrix in order to do correct preallocation
1908     */
1909 
1910     /* first get start and end of "diagonal" columns */
1911     if (csize == PETSC_DECIDE) {
1912       nlocal = n/size + ((n % size) > rank);
1913     } else {
1914       nlocal = csize;
1915     }
1916     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1917     rstart = rend - nlocal;
1918     if (rank == size - 1 && rend != n) {
1919       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1920     }
1921 
1922     /* next, compute all the lengths */
1923     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1924     olens = dlens + m;
1925     for (i=0; i<m; i++) {
1926       jend = ii[i+1] - ii[i];
1927       olen = 0;
1928       dlen = 0;
1929       for (j=0; j<jend; j++) {
1930         if (*jj < rstart || *jj >= rend) olen++;
1931         else dlen++;
1932         jj++;
1933       }
1934       olens[i] = olen;
1935       dlens[i] = dlen;
1936     }
1937     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1938     ierr = PetscFree(dlens);CHKERRQ(ierr);
1939   } else {
1940     int ml,nl;
1941 
1942     M = *newmat;
1943     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1944     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1945     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1946     /*
1947          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1948        rather than the slower MatSetValues().
1949     */
1950     M->was_assembled = PETSC_TRUE;
1951     M->assembled     = PETSC_FALSE;
1952   }
1953   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1954   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1955   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1956   ii  = aij->i;
1957   jj  = aij->j;
1958   aa  = aij->a;
1959   for (i=0; i<m; i++) {
1960     row   = rstart + i;
1961     nz    = ii[i+1] - ii[i];
1962     cwork = jj;     jj += nz;
1963     vwork = aa;     aa += nz;
1964     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1965   }
1966 
1967   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969   *newmat = M;
1970 
1971   /* save submatrix used in processor for next request */
1972   if (call ==  MAT_INITIAL_MATRIX) {
1973     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1974     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1975   }
1976 
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1982 /*@C
1983    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1984    (the default parallel PETSc format).  For good matrix assembly performance
1985    the user should preallocate the matrix storage by setting the parameters
1986    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1987    performance can be increased by more than a factor of 50.
1988 
1989    Collective on MPI_Comm
1990 
1991    Input Parameters:
1992 +  A - the matrix
1993 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1994            (same value is used for all local rows)
1995 .  d_nnz - array containing the number of nonzeros in the various rows of the
1996            DIAGONAL portion of the local submatrix (possibly different for each row)
1997            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1998            The size of this array is equal to the number of local rows, i.e 'm'.
1999            You must leave room for the diagonal entry even if it is zero.
2000 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2001            submatrix (same value is used for all local rows).
2002 -  o_nnz - array containing the number of nonzeros in the various rows of the
2003            OFF-DIAGONAL portion of the local submatrix (possibly different for
2004            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2005            structure. The size of this array is equal to the number
2006            of local rows, i.e 'm'.
2007 
2008    The AIJ format (also called the Yale sparse matrix format or
2009    compressed row storage), is fully compatible with standard Fortran 77
2010    storage.  That is, the stored row and column indices can begin at
2011    either one (as in Fortran) or zero.  See the users manual for details.
2012 
2013    The user MUST specify either the local or global matrix dimensions
2014    (possibly both).
2015 
2016    The parallel matrix is partitioned such that the first m0 rows belong to
2017    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2018    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2019 
2020    The DIAGONAL portion of the local submatrix of a processor can be defined
2021    as the submatrix which is obtained by extraction the part corresponding
2022    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2023    first row that belongs to the processor, and r2 is the last row belonging
2024    to the this processor. This is a square mxm matrix. The remaining portion
2025    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2026 
2027    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2028 
2029    By default, this format uses inodes (identical nodes) when possible.
2030    We search for consecutive rows with the same nonzero structure, thereby
2031    reusing matrix information to achieve increased efficiency.
2032 
2033    Options Database Keys:
2034 +  -mat_aij_no_inode  - Do not use inodes
2035 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2036 -  -mat_aij_oneindex - Internally use indexing starting at 1
2037         rather than 0.  Note that when calling MatSetValues(),
2038         the user still MUST index entries starting at 0!
2039 
2040    Example usage:
2041 
2042    Consider the following 8x8 matrix with 34 non-zero values, that is
2043    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2044    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2045    as follows:
2046 
2047 .vb
2048             1  2  0  |  0  3  0  |  0  4
2049     Proc0   0  5  6  |  7  0  0  |  8  0
2050             9  0 10  | 11  0  0  | 12  0
2051     -------------------------------------
2052            13  0 14  | 15 16 17  |  0  0
2053     Proc1   0 18  0  | 19 20 21  |  0  0
2054             0  0  0  | 22 23  0  | 24  0
2055     -------------------------------------
2056     Proc2  25 26 27  |  0  0 28  | 29  0
2057            30  0  0  | 31 32 33  |  0 34
2058 .ve
2059 
2060    This can be represented as a collection of submatrices as:
2061 
2062 .vb
2063       A B C
2064       D E F
2065       G H I
2066 .ve
2067 
2068    Where the submatrices A,B,C are owned by proc0, D,E,F are
2069    owned by proc1, G,H,I are owned by proc2.
2070 
2071    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2072    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2073    The 'M','N' parameters are 8,8, and have the same values on all procs.
2074 
2075    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2076    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2077    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2078    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2079    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2080    matrix, ans [DF] as another SeqAIJ matrix.
2081 
2082    When d_nz, o_nz parameters are specified, d_nz storage elements are
2083    allocated for every row of the local diagonal submatrix, and o_nz
2084    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2085    One way to choose d_nz and o_nz is to use the max nonzerors per local
2086    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2087    In this case, the values of d_nz,o_nz are:
2088 .vb
2089      proc0 : dnz = 2, o_nz = 2
2090      proc1 : dnz = 3, o_nz = 2
2091      proc2 : dnz = 1, o_nz = 4
2092 .ve
2093    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2094    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2095    for proc3. i.e we are using 12+15+10=37 storage locations to store
2096    34 values.
2097 
2098    When d_nnz, o_nnz parameters are specified, the storage is specified
2099    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2100    In the above case the values for d_nnz,o_nnz are:
2101 .vb
2102      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2103      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2104      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2105 .ve
2106    Here the space allocated is sum of all the above values i.e 34, and
2107    hence pre-allocation is perfect.
2108 
2109    Level: intermediate
2110 
2111 .keywords: matrix, aij, compressed row, sparse, parallel
2112 
2113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2114 @*/
2115 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2116 {
2117   Mat_MPIAIJ   *b;
2118   int          ierr,i;
2119   PetscTruth   flg2;
2120 
2121   PetscFunctionBegin;
2122   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2123   if (!flg2) PetscFunctionReturn(0);
2124   B->preallocated = PETSC_TRUE;
2125   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2126   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2127   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2128   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2129   if (d_nnz) {
2130     for (i=0; i<B->m; i++) {
2131       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2132     }
2133   }
2134   if (o_nnz) {
2135     for (i=0; i<B->m; i++) {
2136       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2137     }
2138   }
2139   b = (Mat_MPIAIJ*)B->data;
2140 
2141   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2142   PetscLogObjectParent(B,b->A);
2143   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2144   PetscLogObjectParent(B,b->B);
2145 
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatCreateMPIAIJ"
2151 /*@C
2152    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2153    (the default parallel PETSc format).  For good matrix assembly performance
2154    the user should preallocate the matrix storage by setting the parameters
2155    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2156    performance can be increased by more than a factor of 50.
2157 
2158    Collective on MPI_Comm
2159 
2160    Input Parameters:
2161 +  comm - MPI communicator
2162 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2163            This value should be the same as the local size used in creating the
2164            y vector for the matrix-vector product y = Ax.
2165 .  n - This value should be the same as the local size used in creating the
2166        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2167        calculated if N is given) For square matrices n is almost always m.
2168 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2169 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2170 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2171            (same value is used for all local rows)
2172 .  d_nnz - array containing the number of nonzeros in the various rows of the
2173            DIAGONAL portion of the local submatrix (possibly different for each row)
2174            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2175            The size of this array is equal to the number of local rows, i.e 'm'.
2176            You must leave room for the diagonal entry even if it is zero.
2177 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2178            submatrix (same value is used for all local rows).
2179 -  o_nnz - array containing the number of nonzeros in the various rows of the
2180            OFF-DIAGONAL portion of the local submatrix (possibly different for
2181            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2182            structure. The size of this array is equal to the number
2183            of local rows, i.e 'm'.
2184 
2185    Output Parameter:
2186 .  A - the matrix
2187 
2188    Notes:
2189    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2190    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2191    storage requirements for this matrix.
2192 
2193    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2194    processor than it must be used on all processors that share the object for
2195    that argument.
2196 
2197    The AIJ format (also called the Yale sparse matrix format or
2198    compressed row storage), is fully compatible with standard Fortran 77
2199    storage.  That is, the stored row and column indices can begin at
2200    either one (as in Fortran) or zero.  See the users manual for details.
2201 
2202    The user MUST specify either the local or global matrix dimensions
2203    (possibly both).
2204 
2205    The parallel matrix is partitioned such that the first m0 rows belong to
2206    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2207    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2208 
2209    The DIAGONAL portion of the local submatrix of a processor can be defined
2210    as the submatrix which is obtained by extraction the part corresponding
2211    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2212    first row that belongs to the processor, and r2 is the last row belonging
2213    to the this processor. This is a square mxm matrix. The remaining portion
2214    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2215 
2216    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2217 
2218    By default, this format uses inodes (identical nodes) when possible.
2219    We search for consecutive rows with the same nonzero structure, thereby
2220    reusing matrix information to achieve increased efficiency.
2221 
2222    Options Database Keys:
2223 +  -mat_aij_no_inode  - Do not use inodes
2224 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2225 -  -mat_aij_oneindex - Internally use indexing starting at 1
2226         rather than 0.  Note that when calling MatSetValues(),
2227         the user still MUST index entries starting at 0!
2228 
2229 
2230    Example usage:
2231 
2232    Consider the following 8x8 matrix with 34 non-zero values, that is
2233    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2234    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2235    as follows:
2236 
2237 .vb
2238             1  2  0  |  0  3  0  |  0  4
2239     Proc0   0  5  6  |  7  0  0  |  8  0
2240             9  0 10  | 11  0  0  | 12  0
2241     -------------------------------------
2242            13  0 14  | 15 16 17  |  0  0
2243     Proc1   0 18  0  | 19 20 21  |  0  0
2244             0  0  0  | 22 23  0  | 24  0
2245     -------------------------------------
2246     Proc2  25 26 27  |  0  0 28  | 29  0
2247            30  0  0  | 31 32 33  |  0 34
2248 .ve
2249 
2250    This can be represented as a collection of submatrices as:
2251 
2252 .vb
2253       A B C
2254       D E F
2255       G H I
2256 .ve
2257 
2258    Where the submatrices A,B,C are owned by proc0, D,E,F are
2259    owned by proc1, G,H,I are owned by proc2.
2260 
2261    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2262    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2263    The 'M','N' parameters are 8,8, and have the same values on all procs.
2264 
2265    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2266    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2267    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2268    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2269    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2270    matrix, ans [DF] as another SeqAIJ matrix.
2271 
2272    When d_nz, o_nz parameters are specified, d_nz storage elements are
2273    allocated for every row of the local diagonal submatrix, and o_nz
2274    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2275    One way to choose d_nz and o_nz is to use the max nonzerors per local
2276    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2277    In this case, the values of d_nz,o_nz are:
2278 .vb
2279      proc0 : dnz = 2, o_nz = 2
2280      proc1 : dnz = 3, o_nz = 2
2281      proc2 : dnz = 1, o_nz = 4
2282 .ve
2283    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2284    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2285    for proc3. i.e we are using 12+15+10=37 storage locations to store
2286    34 values.
2287 
2288    When d_nnz, o_nnz parameters are specified, the storage is specified
2289    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2290    In the above case the values for d_nnz,o_nnz are:
2291 .vb
2292      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2293      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2294      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2295 .ve
2296    Here the space allocated is sum of all the above values i.e 34, and
2297    hence pre-allocation is perfect.
2298 
2299    Level: intermediate
2300 
2301 .keywords: matrix, aij, compressed row, sparse, parallel
2302 
2303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2304 @*/
2305 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2306 {
2307   int ierr,size;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2311   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2312   if (size > 1) {
2313     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2314     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2315   } else {
2316     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2317     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2318   }
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2324 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2325 {
2326   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2327   PetscFunctionBegin;
2328   *Ad     = a->A;
2329   *Ao     = a->B;
2330   *colmap = a->garray;
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2336 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2337 {
2338   int        ierr;
2339   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2340 
2341   PetscFunctionBegin;
2342   if (coloring->ctype == IS_COLORING_LOCAL) {
2343     int        *allcolors,*colors,i;
2344     ISColoring ocoloring;
2345 
2346     /* set coloring for diagonal portion */
2347     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2348 
2349     /* set coloring for off-diagonal portion */
2350     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2351     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2352     for (i=0; i<a->B->n; i++) {
2353       colors[i] = allcolors[a->garray[i]];
2354     }
2355     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2356     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2357     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2358     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2359   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2360     int        *colors,i,*larray;
2361     ISColoring ocoloring;
2362 
2363     /* set coloring for diagonal portion */
2364     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2365     for (i=0; i<a->A->n; i++) {
2366       larray[i] = i + a->cstart;
2367     }
2368     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2369     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2370     for (i=0; i<a->A->n; i++) {
2371       colors[i] = coloring->colors[larray[i]];
2372     }
2373     ierr = PetscFree(larray);CHKERRQ(ierr);
2374     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2375     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2376     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2377 
2378     /* set coloring for off-diagonal portion */
2379     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2380     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2381     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2382     for (i=0; i<a->B->n; i++) {
2383       colors[i] = coloring->colors[larray[i]];
2384     }
2385     ierr = PetscFree(larray);CHKERRQ(ierr);
2386     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2387     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2388     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2389   } else {
2390     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2391   }
2392 
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2398 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2399 {
2400   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2401   int        ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2405   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2411 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2412 {
2413   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2414   int        ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2418   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421