xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 95d5f7c29374efcfd2ca44c2fe93981fbc2b4454)
1 /*$Id: mpiaij.c,v 1.312 2000/04/09 04:36:04 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 extern int MatSetUpMultiply_MPIAIJ(Mat);
8 extern int DisAssemble_MPIAIJ(Mat);
9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 extern int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
198   Scalar     value;
199   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int        cstart = aij->cstart,cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
206   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208   PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE);
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
211   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234         else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_BOPT_g)
236         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!aij->colmap) {
241               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined (PETSC_USE_CTABLE)
244             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
245 	    col--;
246 #else
247             col = aij->colmap[in[j]] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
250               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
251               col =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
253               B = aij->B;
254               b = (Mat_SeqAIJ*)B->data;
255               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
256               ba = b->a;
257             }
258           } else col = in[j];
259           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
260           if (ignorezeroentries && value == 0.0) continue;
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         }
264       }
265     } else {
266       if (!aij->donotstash) {
267         if (roworiented) {
268           if (ignorezeroentries && v[i*n] == 0.0) continue;
269           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
270         } else {
271           if (ignorezeroentries && v[i] == 0.0) continue;
272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
273         }
274       }
275     }
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNC__
281 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ"
282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
283 {
284   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
285   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
286   int        cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
291     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
296         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNC__
324 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ"
325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
328   int         ierr,nstash,reallocs;
329   InsertMode  addv;
330 
331   PetscFunctionBegin;
332   if (aij->donotstash) {
333     PetscFunctionReturn(0);
334   }
335 
336   /* make sure all processors are either in INSERTMODE or ADDMODE */
337   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
338   if (addv == (ADD_VALUES|INSERT_VALUES)) {
339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
340   }
341   mat->insertmode = addv; /* in case this processor had no cache */
342 
343   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
344   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
345   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 #undef __FUNC__
351 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ"
352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
355   int         i,j,rstart,ncols,n,ierr,flg;
356   int         *row,*col,other_disassembled;
357   Scalar      *val;
358   InsertMode  addv = mat->insertmode;
359 
360   PetscFunctionBegin;
361   if (!aij->donotstash) {
362     while (1) {
363       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
364       if (!flg) break;
365 
366       for (i=0; i<n;) {
367         /* Now identify the consecutive vals belonging to the same row */
368         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
369         if (j < n) ncols = j-i;
370         else       ncols = n-i;
371         /* Now assemble all these values with a single function call */
372         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
373         i = j;
374       }
375     }
376     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
377   }
378 
379   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
380   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
381 
382   /* determine if any processor has disassembled, if so we must
383      also disassemble ourselfs, in order that we may reassemble. */
384   /*
385      if nonzero structure of submatrix B cannot change then we know that
386      no processor disassembled thus we can skip this stuff
387   */
388   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
389     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
390     if (mat->was_assembled && !other_disassembled) {
391       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
392     }
393   }
394 
395   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
396     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
397   }
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ"
410 int MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   int        ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNC__
422 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ"
423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
427   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
428   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
429   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
430   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
431   MPI_Comm       comm = A->comm;
432   MPI_Request    *send_waits,*recv_waits;
433   MPI_Status     recv_status,*send_status;
434   IS             istmp;
435 
436   PetscFunctionBegin;
437   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
438   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
439 
440   /*  first count number of contributors to each processor */
441   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
442   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
443   procs  = nprocs + size;
444   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
445   for (i=0; i<N; i++) {
446     idx = rows[i];
447     found = 0;
448     for (j=0; j<size; j++) {
449       if (idx >= owners[j] && idx < owners[j+1]) {
450         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
451       }
452     }
453     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
454   }
455   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
456 
457   /* inform other processors of number of messages and max length*/
458   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
459   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
460   nrecvs = work[size+rank];
461   nmax   = work[rank];
462   ierr   = PetscFree(work);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
466   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
476   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
477   starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (procs[i]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
563     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr        = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNC__
573 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != a->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNC__
592 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNC__
607 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ"
629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
630 {
631   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
632   int        ierr;
633 
634   PetscFunctionBegin;
635   /* do nondiagonal part */
636   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
637   /* send it on its way */
638   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639   /* do local part */
640   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
641   /* receive remote parts: note this assumes the values are not actually */
642   /* inserted in yy until the next line, which is true for my implementation*/
643   /* but is not perhaps always true. */
644   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 /*
649   This only works correctly for square matrices where the subblock A->A is the
650    diagonal block
651 */
652 #undef __FUNC__
653 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ"
654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
655 {
656   int        ierr;
657   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
658 
659   PetscFunctionBegin;
660   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
661   if (a->rstart != a->cstart || a->rend != a->cend) {
662     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
663   }
664   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ"
670 int MatScale_MPIAIJ(Scalar *aa,Mat A)
671 {
672   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
677   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNC__
682 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ"
683 int MatDestroy_MPIAIJ(Mat mat)
684 {
685   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689 
690   if (mat->mapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
692   }
693   if (mat->bmapping) {
694     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
695   }
696   if (mat->rmap) {
697     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
698   }
699   if (mat->cmap) {
700     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
701   }
702 #if defined(PETSC_USE_LOG)
703   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PLogObjectDestroy(mat);
720   PetscHeaderDestroy(mat);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNC__
725 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_Binary"
726 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
727 {
728   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
729   int         ierr;
730 
731   PetscFunctionBegin;
732   if (aij->size == 1) {
733     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
734   }
735   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNC__
740 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket"
741 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
742 {
743   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
745   int         ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size;
746   PetscTruth  isdraw,isascii,flg;
747 
748   PetscFunctionBegin;
749   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
751   if (isascii) {
752     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
753     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
754       MatInfo info;
755       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
758       if (flg) {
759         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
760 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
761       } else {
762         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
763 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
764       }
765       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
766       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
767       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
768       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
769       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
770       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
771       PetscFunctionReturn(0);
772     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
773       PetscFunctionReturn(0);
774     }
775   } else if (isdraw) {
776     Draw       draw;
777     PetscTruth isnull;
778     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
779     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
780   }
781 
782   if (size == 1) {
783     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
784   } else {
785     /* assemble the entire matrix onto first processor. */
786     Mat         A;
787     Mat_SeqAIJ *Aloc;
788     int         M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
789     Scalar      *a;
790 
791     if (!rank) {
792       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
793     } else {
794       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
795     }
796     PLogObjectParent(mat,A);
797 
798     /* copy over the A part */
799     Aloc = (Mat_SeqAIJ*)aij->A->data;
800     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
801     row = aij->rstart;
802     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
803     for (i=0; i<m; i++) {
804       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
805       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
806     }
807     aj = Aloc->j;
808     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
809 
810     /* copy over the B part */
811     Aloc = (Mat_SeqAIJ*)aij->B->data;
812     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
813     row = aij->rstart;
814     ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols);
815     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
816     for (i=0; i<m; i++) {
817       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
818       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
819     }
820     ierr = PetscFree(ct);CHKERRQ(ierr);
821     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     /*
824        Everyone has to call to draw the matrix since the graphics waits are
825        synchronized across all processors that share the Draw object
826     */
827     if (!rank) {
828       Viewer sviewer;
829       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
830       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
831       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
832     }
833     ierr = MatDestroy(A);CHKERRQ(ierr);
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNC__
839 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ"
840 int MatView_MPIAIJ(Mat mat,Viewer viewer)
841 {
842   int        ierr;
843   PetscTruth isascii,isdraw,issocket,isbinary;
844 
845   PetscFunctionBegin;
846   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
847   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
848   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
849   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
850   if (isascii || isdraw || isbinary || issocket) {
851     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
852   } else {
853     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 /*
859     This has to provide several versions.
860 
861      2) a) use only local smoothing updating outer values only once.
862         b) local smoothing updating outer values each inner iteration
863      3) color updating out values betwen colors.
864 */
865 #undef __FUNC__
866 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ"
867 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,
868                            PetscReal fshift,int its,Vec xx)
869 {
870   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
871   Mat        AA = mat->A,BB = mat->B;
872   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
873   Scalar     *b,*x,*xs,*ls,d,*v,sum;
874   int        ierr,*idx,*diag;
875   int        n = mat->n,m = mat->m,i,shift = A->indexshift;
876 
877   PetscFunctionBegin;
878   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
879   diag = A->diag;
880   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
881     if (flag & SOR_ZERO_INITIAL_GUESS) {
882       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
883       PetscFunctionReturn(0);
884     }
885     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
886     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
887     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888     if (xx != bb) {
889       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
890     } else {
891       b = x;
892     }
893     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
894     xs = x + shift; /* shift by one for index start of 1 */
895     ls = ls + shift;
896     while (its--) {
897       /* go down through the rows */
898       for (i=0; i<m; i++) {
899         n    = A->i[i+1] - A->i[i];
900 	PLogFlops(4*n+3);
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
911       }
912       /* come up through the rows */
913       for (i=m-1; i>-1; i--) {
914         n    = A->i[i+1] - A->i[i];
915 	PLogFlops(4*n+3)
916         idx  = A->j + A->i[i] + shift;
917         v    = A->a + A->i[i] + shift;
918         sum  = b[i];
919         SPARSEDENSEMDOT(sum,xs,v,idx,n);
920         d    = fshift + A->a[diag[i]+shift];
921         n    = B->i[i+1] - B->i[i];
922         idx  = B->j + B->i[i] + shift;
923         v    = B->a + B->i[i] + shift;
924         SPARSEDENSEMDOT(sum,ls,v,idx,n);
925         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
926       }
927     }
928     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
930     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
931   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
932     if (flag & SOR_ZERO_INITIAL_GUESS) {
933       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
934       PetscFunctionReturn(0);
935     }
936     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
937     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
938     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
939     if (xx != bb) {
940       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
941     } else {
942       b = x;
943     }
944     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
945     xs = x + shift; /* shift by one for index start of 1 */
946     ls = ls + shift;
947     while (its--) {
948       for (i=0; i<m; i++) {
949         n    = A->i[i+1] - A->i[i];
950 	PLogFlops(4*n+3);
951         idx  = A->j + A->i[i] + shift;
952         v    = A->a + A->i[i] + shift;
953         sum  = b[i];
954         SPARSEDENSEMDOT(sum,xs,v,idx,n);
955         d    = fshift + A->a[diag[i]+shift];
956         n    = B->i[i+1] - B->i[i];
957         idx  = B->j + B->i[i] + shift;
958         v    = B->a + B->i[i] + shift;
959         SPARSEDENSEMDOT(sum,ls,v,idx,n);
960         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
961       }
962     }
963     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
965     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
966   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
967     if (flag & SOR_ZERO_INITIAL_GUESS) {
968       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     }
971     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
972     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
973     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
974     if (xx != bb) {
975       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
976     } else {
977       b = x;
978     }
979     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
980     xs = x + shift; /* shift by one for index start of 1 */
981     ls = ls + shift;
982     while (its--) {
983       for (i=m-1; i>-1; i--) {
984         n    = A->i[i+1] - A->i[i];
985 	PLogFlops(4*n+3);
986         idx  = A->j + A->i[i] + shift;
987         v    = A->a + A->i[i] + shift;
988         sum  = b[i];
989         SPARSEDENSEMDOT(sum,xs,v,idx,n);
990         d    = fshift + A->a[diag[i]+shift];
991         n    = B->i[i+1] - B->i[i];
992         idx  = B->j + B->i[i] + shift;
993         v    = B->a + B->i[i] + shift;
994         SPARSEDENSEMDOT(sum,ls,v,idx,n);
995         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
996       }
997     }
998     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
999     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
1000     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1001   } else {
1002     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNC__
1008 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ"
1009 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1010 {
1011   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1012   Mat        A = mat->A,B = mat->B;
1013   int        ierr;
1014   PetscReal  isend[5],irecv[5];
1015 
1016   PetscFunctionBegin;
1017   info->block_size     = 1.0;
1018   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1019   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1020   isend[3] = info->memory;  isend[4] = info->mallocs;
1021   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1022   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1023   isend[3] += info->memory;  isend[4] += info->mallocs;
1024   if (flag == MAT_LOCAL) {
1025     info->nz_used      = isend[0];
1026     info->nz_allocated = isend[1];
1027     info->nz_unneeded  = isend[2];
1028     info->memory       = isend[3];
1029     info->mallocs      = isend[4];
1030   } else if (flag == MAT_GLOBAL_MAX) {
1031     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1032     info->nz_used      = irecv[0];
1033     info->nz_allocated = irecv[1];
1034     info->nz_unneeded  = irecv[2];
1035     info->memory       = irecv[3];
1036     info->mallocs      = irecv[4];
1037   } else if (flag == MAT_GLOBAL_SUM) {
1038     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1039     info->nz_used      = irecv[0];
1040     info->nz_allocated = irecv[1];
1041     info->nz_unneeded  = irecv[2];
1042     info->memory       = irecv[3];
1043     info->mallocs      = irecv[4];
1044   }
1045   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1046   info->fill_ratio_needed = 0;
1047   info->factor_mallocs    = 0;
1048   info->rows_global       = (double)mat->M;
1049   info->columns_global    = (double)mat->N;
1050   info->rows_local        = (double)mat->m;
1051   info->columns_local     = (double)mat->N;
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNC__
1057 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ"
1058 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1061   int        ierr;
1062 
1063   PetscFunctionBegin;
1064   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1065       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1066       op == MAT_COLUMNS_UNSORTED ||
1067       op == MAT_COLUMNS_SORTED ||
1068       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1069       op == MAT_KEEP_ZEROED_ROWS ||
1070       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1071       op == MAT_USE_INODES ||
1072       op == MAT_DO_NOT_USE_INODES ||
1073       op == MAT_IGNORE_ZERO_ENTRIES) {
1074         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1075         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1076   } else if (op == MAT_ROW_ORIENTED) {
1077     a->roworiented = PETSC_TRUE;
1078     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1079     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1080   } else if (op == MAT_ROWS_SORTED ||
1081              op == MAT_ROWS_UNSORTED ||
1082              op == MAT_SYMMETRIC ||
1083              op == MAT_STRUCTURALLY_SYMMETRIC ||
1084              op == MAT_YES_NEW_DIAGONALS) {
1085     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1086   } else if (op == MAT_COLUMN_ORIENTED) {
1087     a->roworiented = PETSC_FALSE;
1088     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1089     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1090   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1091     a->donotstash = PETSC_TRUE;
1092   } else if (op == MAT_NO_NEW_DIAGONALS){
1093     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1094   } else {
1095     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIAIJ"
1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1105 
1106   PetscFunctionBegin;
1107   if (m) *m = mat->M;
1108   if (n) *n = mat->N;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNC__
1113 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIAIJ"
1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1115 {
1116   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1117 
1118   PetscFunctionBegin;
1119   if (m) *m = mat->m;
1120   if (n) *n = mat->n;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ"
1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1127 {
1128   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1129 
1130   PetscFunctionBegin;
1131   if (m) *m = mat->rstart;
1132   if (n) *n = mat->rend;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNC__
1137 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ"
1138 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1139 {
1140   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1141   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1142   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1143   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1144   int        *cmap,*idx_p;
1145 
1146   PetscFunctionBegin;
1147   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1148   mat->getrowactive = PETSC_TRUE;
1149 
1150   if (!mat->rowvalues && (idx || v)) {
1151     /*
1152         allocate enough space to hold information from the longest row.
1153     */
1154     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1155     int     max = 1,m = mat->m,tmp;
1156     for (i=0; i<m; i++) {
1157       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1158       if (max < tmp) { max = tmp; }
1159     }
1160     mat->rowvalues  = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1161     mat->rowindices = (int*)(mat->rowvalues + max);
1162   }
1163 
1164   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1165   lrow = row - rstart;
1166 
1167   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1168   if (!v)   {pvA = 0; pvB = 0;}
1169   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1170   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1171   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1172   nztot = nzA + nzB;
1173 
1174   cmap  = mat->garray;
1175   if (v  || idx) {
1176     if (nztot) {
1177       /* Sort by increasing column numbers, assuming A and B already sorted */
1178       int imark = -1;
1179       if (v) {
1180         *v = v_p = mat->rowvalues;
1181         for (i=0; i<nzB; i++) {
1182           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1183           else break;
1184         }
1185         imark = i;
1186         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1187         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1188       }
1189       if (idx) {
1190         *idx = idx_p = mat->rowindices;
1191         if (imark > -1) {
1192           for (i=0; i<imark; i++) {
1193             idx_p[i] = cmap[cworkB[i]];
1194           }
1195         } else {
1196           for (i=0; i<nzB; i++) {
1197             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1198             else break;
1199           }
1200           imark = i;
1201         }
1202         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1203         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1204       }
1205     } else {
1206       if (idx) *idx = 0;
1207       if (v)   *v   = 0;
1208     }
1209   }
1210   *nz = nztot;
1211   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1212   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNC__
1217 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ"
1218 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1219 {
1220   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1221 
1222   PetscFunctionBegin;
1223   if (aij->getrowactive == PETSC_FALSE) {
1224     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1225   }
1226   aij->getrowactive = PETSC_FALSE;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNC__
1231 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ"
1232 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1233 {
1234   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1235   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1236   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1237   PetscReal  sum = 0.0;
1238   Scalar     *v;
1239 
1240   PetscFunctionBegin;
1241   if (aij->size == 1) {
1242     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1243   } else {
1244     if (type == NORM_FROBENIUS) {
1245       v = amat->a;
1246       for (i=0; i<amat->nz; i++) {
1247 #if defined(PETSC_USE_COMPLEX)
1248         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1249 #else
1250         sum += (*v)*(*v); v++;
1251 #endif
1252       }
1253       v = bmat->a;
1254       for (i=0; i<bmat->nz; i++) {
1255 #if defined(PETSC_USE_COMPLEX)
1256         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1257 #else
1258         sum += (*v)*(*v); v++;
1259 #endif
1260       }
1261       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1262       *norm = sqrt(*norm);
1263     } else if (type == NORM_1) { /* max column norm */
1264       PetscReal *tmp,*tmp2;
1265       int    *jj,*garray = aij->garray;
1266       tmp  = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp);
1267       tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2);
1268       ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr);
1269       *norm = 0.0;
1270       v = amat->a; jj = amat->j;
1271       for (j=0; j<amat->nz; j++) {
1272         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1273       }
1274       v = bmat->a; jj = bmat->j;
1275       for (j=0; j<bmat->nz; j++) {
1276         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1277       }
1278       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1279       for (j=0; j<aij->N; j++) {
1280         if (tmp2[j] > *norm) *norm = tmp2[j];
1281       }
1282       ierr = PetscFree(tmp);CHKERRQ(ierr);
1283       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1284     } else if (type == NORM_INFINITY) { /* max row norm */
1285       PetscReal ntemp = 0.0;
1286       for (j=0; j<amat->m; j++) {
1287         v = amat->a + amat->i[j] + shift;
1288         sum = 0.0;
1289         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1290           sum += PetscAbsScalar(*v); v++;
1291         }
1292         v = bmat->a + bmat->i[j] + shift;
1293         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1294           sum += PetscAbsScalar(*v); v++;
1295         }
1296         if (sum > ntemp) ntemp = sum;
1297       }
1298       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1299     } else {
1300       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1301     }
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNC__
1307 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ"
1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1309 {
1310   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1311   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1312   int        ierr,shift = Aloc->indexshift;
1313   int        M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1314   Mat        B;
1315   Scalar     *array;
1316 
1317   PetscFunctionBegin;
1318   if (!matout && M != N) {
1319     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1320   }
1321 
1322   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1323 
1324   /* copy over the A part */
1325   Aloc = (Mat_SeqAIJ*)a->A->data;
1326   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1327   row = a->rstart;
1328   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1329   for (i=0; i<m; i++) {
1330     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1331     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1332   }
1333   aj = Aloc->j;
1334   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1335 
1336   /* copy over the B part */
1337   Aloc = (Mat_SeqAIJ*)a->B->data;
1338   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1339   row = a->rstart;
1340   ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols);
1341   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1342   for (i=0; i<m; i++) {
1343     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1344     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1345   }
1346   ierr = PetscFree(ct);CHKERRQ(ierr);
1347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1349   if (matout) {
1350     *matout = B;
1351   } else {
1352     PetscOps       *Abops;
1353     struct _MatOps *Aops;
1354 
1355     /* This isn't really an in-place transpose .... but free data structures from a */
1356     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1357     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1358     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1359 #if defined (PETSC_USE_CTABLE)
1360     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1361 #else
1362     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1363 #endif
1364     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1365     if (a->lvec)   {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);}
1366     if (a->Mvctx)  {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);}
1367     ierr = PetscFree(a);CHKERRQ(ierr);
1368 
1369     /*
1370        This is horrible, horrible code. We need to keep the
1371       A pointers for the bops and ops but copy everything
1372       else from C.
1373     */
1374     Abops   = A->bops;
1375     Aops    = A->ops;
1376     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1377     A->bops = Abops;
1378     A->ops  = Aops;
1379     PetscHeaderDestroy(B);
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNC__
1385 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ"
1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1387 {
1388   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1389   Mat        a = aij->A,b = aij->B;
1390   int        ierr,s1,s2,s3;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1399   }
1400   if (ll) {
1401     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1402     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1403     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1404   }
1405   /* scale  the diagonal block */
1406   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1407 
1408   if (rr) {
1409     /* Do a scatter end and then right scale the off-diagonal block */
1410     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1411     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1412   }
1413 
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 
1418 #undef __FUNC__
1419 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ"
1420 int MatPrintHelp_MPIAIJ(Mat A)
1421 {
1422   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1423   int        ierr;
1424 
1425   PetscFunctionBegin;
1426   if (!a->rank) {
1427     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNC__
1433 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ"
1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1435 {
1436   PetscFunctionBegin;
1437   *bs = 1;
1438   PetscFunctionReturn(0);
1439 }
1440 #undef __FUNC__
1441 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ"
1442 int MatSetUnfactored_MPIAIJ(Mat A)
1443 {
1444   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1445   int        ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ"
1454 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1455 {
1456   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1457   Mat        a,b,c,d;
1458   PetscTruth flg;
1459   int        ierr;
1460 
1461   PetscFunctionBegin;
1462   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1463   a = matA->A; b = matA->B;
1464   c = matB->A; d = matB->B;
1465 
1466   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1467   if (flg == PETSC_TRUE) {
1468     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1469   }
1470   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNC__
1475 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ"
1476 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1477 {
1478   int        ierr;
1479   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1480   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1481 
1482   PetscFunctionBegin;
1483   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1484     /* because of the column compression in the off-processor part of the matrix a->B,
1485        the number of columns in a->B and b->B may be different, hence we cannot call
1486        the MatCopy() directly on the two parts. If need be, we can provide a more
1487        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1488        then copying the submatrices */
1489     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1490   } else {
1491     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1492     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1498 extern int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1499 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1500 extern int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1501 extern int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1502 
1503 /* -------------------------------------------------------------------*/
1504 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1505        MatGetRow_MPIAIJ,
1506        MatRestoreRow_MPIAIJ,
1507        MatMult_MPIAIJ,
1508        MatMultAdd_MPIAIJ,
1509        MatMultTranspose_MPIAIJ,
1510        MatMultTransposeAdd_MPIAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515        0,
1516        0,
1517        MatRelax_MPIAIJ,
1518        MatTranspose_MPIAIJ,
1519        MatGetInfo_MPIAIJ,
1520        MatEqual_MPIAIJ,
1521        MatGetDiagonal_MPIAIJ,
1522        MatDiagonalScale_MPIAIJ,
1523        MatNorm_MPIAIJ,
1524        MatAssemblyBegin_MPIAIJ,
1525        MatAssemblyEnd_MPIAIJ,
1526        0,
1527        MatSetOption_MPIAIJ,
1528        MatZeroEntries_MPIAIJ,
1529        MatZeroRows_MPIAIJ,
1530        0,
1531        0,
1532        0,
1533        0,
1534        MatGetSize_MPIAIJ,
1535        MatGetLocalSize_MPIAIJ,
1536        MatGetOwnershipRange_MPIAIJ,
1537        0,
1538        0,
1539        0,
1540        0,
1541        MatDuplicate_MPIAIJ,
1542        0,
1543        0,
1544        0,
1545        0,
1546        0,
1547        MatGetSubMatrices_MPIAIJ,
1548        MatIncreaseOverlap_MPIAIJ,
1549        MatGetValues_MPIAIJ,
1550        MatCopy_MPIAIJ,
1551        MatPrintHelp_MPIAIJ,
1552        MatScale_MPIAIJ,
1553        0,
1554        0,
1555        0,
1556        MatGetBlockSize_MPIAIJ,
1557        0,
1558        0,
1559        0,
1560        0,
1561        MatFDColoringCreate_MPIAIJ,
1562        0,
1563        MatSetUnfactored_MPIAIJ,
1564        0,
1565        0,
1566        MatGetSubMatrix_MPIAIJ,
1567        0,
1568        0,
1569        MatGetMaps_Petsc};
1570 
1571 /* ----------------------------------------------------------------------------------------*/
1572 
1573 EXTERN_C_BEGIN
1574 #undef __FUNC__
1575 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ"
1576 int MatStoreValues_MPIAIJ(Mat mat)
1577 {
1578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1579   int        ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1583   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 EXTERN_C_END
1587 
1588 EXTERN_C_BEGIN
1589 #undef __FUNC__
1590 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ"
1591 int MatRetrieveValues_MPIAIJ(Mat mat)
1592 {
1593   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1594   int        ierr;
1595 
1596   PetscFunctionBegin;
1597   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1598   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 #include "pc.h"
1604 EXTERN_C_BEGIN
1605 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1606 EXTERN_C_END
1607 
1608 #undef __FUNC__
1609 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ"
1610 /*@C
1611    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1612    (the default parallel PETSc format).  For good matrix assembly performance
1613    the user should preallocate the matrix storage by setting the parameters
1614    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1615    performance can be increased by more than a factor of 50.
1616 
1617    Collective on MPI_Comm
1618 
1619    Input Parameters:
1620 +  comm - MPI communicator
1621 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1622            This value should be the same as the local size used in creating the
1623            y vector for the matrix-vector product y = Ax.
1624 .  n - This value should be the same as the local size used in creating the
1625        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1626        calculated if N is given) For square matrices n is almost always m.
1627 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1628 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1629 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1630            (same value is used for all local rows)
1631 .  d_nnz - array containing the number of nonzeros in the various rows of the
1632            DIAGONAL portion of the local submatrix (possibly different for each row)
1633            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1634            The size of this array is equal to the number of local rows, i.e 'm'.
1635            You must leave room for the diagonal entry even if it is zero.
1636 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1637            submatrix (same value is used for all local rows).
1638 -  o_nnz - array containing the number of nonzeros in the various rows of the
1639            OFF-DIAGONAL portion of the local submatrix (possibly different for
1640            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1641            structure. The size of this array is equal to the number
1642            of local rows, i.e 'm'.
1643 
1644    Output Parameter:
1645 .  A - the matrix
1646 
1647    Notes:
1648    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1649    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1650    storage requirements for this matrix.
1651 
1652    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1653    processor than it must be used on all processors that share the object for
1654    that argument.
1655 
1656    The AIJ format (also called the Yale sparse matrix format or
1657    compressed row storage), is fully compatible with standard Fortran 77
1658    storage.  That is, the stored row and column indices can begin at
1659    either one (as in Fortran) or zero.  See the users manual for details.
1660 
1661    The user MUST specify either the local or global matrix dimensions
1662    (possibly both).
1663 
1664    The parallel matrix is partitioned such that the first m0 rows belong to
1665    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1666    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1667 
1668    The DIAGONAL portion of the local submatrix of a processor can be defined
1669    as the submatrix which is obtained by extraction the part corresponding
1670    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1671    first row that belongs to the processor, and r2 is the last row belonging
1672    to the this processor. This is a square mxm matrix. The remaining portion
1673    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1674 
1675    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1676 
1677    By default, this format uses inodes (identical nodes) when possible.
1678    We search for consecutive rows with the same nonzero structure, thereby
1679    reusing matrix information to achieve increased efficiency.
1680 
1681    Options Database Keys:
1682 +  -mat_aij_no_inode  - Do not use inodes
1683 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1684 -  -mat_aij_oneindex - Internally use indexing starting at 1
1685         rather than 0.  Note that when calling MatSetValues(),
1686         the user still MUST index entries starting at 0!
1687 .   -mat_mpi - use the parallel matrix data structures even on one processor
1688                (defaults to using SeqBAIJ format on one processor)
1689 .   -mat_mpi - use the parallel matrix data structures even on one processor
1690                (defaults to using SeqAIJ format on one processor)
1691 
1692 
1693    Example usage:
1694 
1695    Consider the following 8x8 matrix with 34 non-zero values, that is
1696    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1697    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1698    as follows:
1699 
1700 .vb
1701             1  2  0  |  0  3  0  |  0  4
1702     Proc0   0  5  6  |  7  0  0  |  8  0
1703             9  0 10  | 11  0  0  | 12  0
1704     -------------------------------------
1705            13  0 14  | 15 16 17  |  0  0
1706     Proc1   0 18  0  | 19 20 21  |  0  0
1707             0  0  0  | 22 23  0  | 24  0
1708     -------------------------------------
1709     Proc2  25 26 27  |  0  0 28  | 29  0
1710            30  0  0  | 31 32 33  |  0 34
1711 .ve
1712 
1713    This can be represented as a collection of submatrices as:
1714 
1715 .vb
1716       A B C
1717       D E F
1718       G H I
1719 .ve
1720 
1721    Where the submatrices A,B,C are owned by proc0, D,E,F are
1722    owned by proc1, G,H,I are owned by proc2.
1723 
1724    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1725    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1726    The 'M','N' parameters are 8,8, and have the same values on all procs.
1727 
1728    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1729    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1730    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1731    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1732    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1733    matrix, ans [DF] as another SeqAIJ matrix.
1734 
1735    When d_nz, o_nz parameters are specified, d_nz storage elements are
1736    allocated for every row of the local diagonal submatrix, and o_nz
1737    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1738    One way to choose d_nz and o_nz is to use the max nonzerors per local
1739    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1740    In this case, the values of d_nz,o_nz are:
1741 .vb
1742      proc0 : dnz = 2, o_nz = 2
1743      proc1 : dnz = 3, o_nz = 2
1744      proc2 : dnz = 1, o_nz = 4
1745 .ve
1746    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1747    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1748    for proc3. i.e we are using 12+15+10=37 storage locations to store
1749    34 values.
1750 
1751    When d_nnz, o_nnz parameters are specified, the storage is specified
1752    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1753    In the above case the values for d_nnz,o_nnz are:
1754 .vb
1755      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1756      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1757      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1758 .ve
1759    Here the space allocated is sum of all the above values i.e 34, and
1760    hence pre-allocation is perfect.
1761 
1762    Level: intermediate
1763 
1764 .keywords: matrix, aij, compressed row, sparse, parallel
1765 
1766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1767 @*/
1768 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1769 {
1770   Mat          B;
1771   Mat_MPIAIJ   *b;
1772   int          ierr,i,size;
1773   PetscTruth   flag1,flag2;
1774 
1775   PetscFunctionBegin;
1776 
1777   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1778   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1779   if (d_nnz) {
1780     for (i=0; i<m; i++) {
1781       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1782     }
1783   }
1784   if (o_nnz) {
1785     for (i=0; i<m; i++) {
1786       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1787     }
1788   }
1789 
1790   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1791   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1792   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1793   if (!flag1 && !flag2 && size == 1) {
1794     if (M == PETSC_DECIDE) M = m;
1795     if (N == PETSC_DECIDE) N = n;
1796     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1797     PetscFunctionReturn(0);
1798   }
1799 
1800   *A = 0;
1801   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1802   PLogObjectCreate(B);
1803   B->data         = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1804   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1805   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1806   B->ops->destroy = MatDestroy_MPIAIJ;
1807   B->ops->view    = MatView_MPIAIJ;
1808   B->factor       = 0;
1809   B->assembled    = PETSC_FALSE;
1810   B->mapping      = 0;
1811 
1812   B->insertmode      = NOT_SET_VALUES;
1813   b->size            = size;
1814   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1815 
1816   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1817     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1818   }
1819 
1820   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1821   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1822   b->m = m; B->m = m;
1823   b->n = n; B->n = n;
1824   b->N = N; B->N = N;
1825   b->M = M; B->M = M;
1826 
1827   /* the information in the maps duplicates the information computed below, eventually
1828      we should remove the duplicate information that is not contained in the maps */
1829   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1830   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1831 
1832   /* build local table of row and column ownerships */
1833   b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1834   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1835   b->cowners = b->rowners + b->size + 2;
1836   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1837   b->rowners[0] = 0;
1838   for (i=2; i<=b->size; i++) {
1839     b->rowners[i] += b->rowners[i-1];
1840   }
1841   b->rstart = b->rowners[b->rank];
1842   b->rend   = b->rowners[b->rank+1];
1843   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1844   b->cowners[0] = 0;
1845   for (i=2; i<=b->size; i++) {
1846     b->cowners[i] += b->cowners[i-1];
1847   }
1848   b->cstart = b->cowners[b->rank];
1849   b->cend   = b->cowners[b->rank+1];
1850 
1851   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1852   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1853   PLogObjectParent(B,b->A);
1854   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1855   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1856   PLogObjectParent(B,b->B);
1857 
1858   /* build cache for off array entries formed */
1859   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1860   b->donotstash  = PETSC_FALSE;
1861   b->colmap      = 0;
1862   b->garray      = 0;
1863   b->roworiented = PETSC_TRUE;
1864 
1865   /* stuff used for matrix vector multiply */
1866   b->lvec      = PETSC_NULL;
1867   b->Mvctx     = PETSC_NULL;
1868 
1869   /* stuff for MatGetRow() */
1870   b->rowindices   = 0;
1871   b->rowvalues    = 0;
1872   b->getrowactive = PETSC_FALSE;
1873 
1874   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1875                                      "MatStoreValues_MPIAIJ",
1876                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1878                                      "MatRetrieveValues_MPIAIJ",
1879                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1881                                      "MatGetDiagonalBlock_MPIAIJ",
1882                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1883   *A = B;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNC__
1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ"
1889 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1890 {
1891   Mat        mat;
1892   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1893   int        ierr,len = 0;
1894   PetscTruth flg;
1895 
1896   PetscFunctionBegin;
1897   *newmat       = 0;
1898   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1899   PLogObjectCreate(mat);
1900   mat->data         = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1901   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1902   mat->ops->destroy = MatDestroy_MPIAIJ;
1903   mat->ops->view    = MatView_MPIAIJ;
1904   mat->factor       = matin->factor;
1905   mat->assembled    = PETSC_TRUE;
1906   mat->insertmode   = NOT_SET_VALUES;
1907 
1908   a->m = mat->m   = oldmat->m;
1909   a->n = mat->n   = oldmat->n;
1910   a->M = mat->M   = oldmat->M;
1911   a->N = mat->N   = oldmat->N;
1912 
1913   a->rstart       = oldmat->rstart;
1914   a->rend         = oldmat->rend;
1915   a->cstart       = oldmat->cstart;
1916   a->cend         = oldmat->cend;
1917   a->size         = oldmat->size;
1918   a->rank         = oldmat->rank;
1919   a->donotstash   = oldmat->donotstash;
1920   a->roworiented  = oldmat->roworiented;
1921   a->rowindices   = 0;
1922   a->rowvalues    = 0;
1923   a->getrowactive = PETSC_FALSE;
1924 
1925   a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1926   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1927   a->cowners = a->rowners + a->size + 2;
1928   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1929   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1935     PLogObjectMemory(mat,(a->N)*sizeof(int));
1936     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     len = ((Mat_SeqAIJ*)(oldmat->B->data))->n;
1941     a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1942     PLogObjectMemory(mat,len*sizeof(int));
1943     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1944   } else a->garray = 0;
1945 
1946   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1947   PLogObjectParent(mat,a->lvec);
1948   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1949   PLogObjectParent(mat,a->Mvctx);
1950   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1951   PLogObjectParent(mat,a->A);
1952   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1953   PLogObjectParent(mat,a->B);
1954   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1955   if (flg) {
1956     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1957   }
1958   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1959   *newmat = mat;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #include "sys.h"
1964 
1965 #undef __FUNC__
1966 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ"
1967 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1968 {
1969   Mat          A;
1970   Scalar       *vals,*svals;
1971   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1972   MPI_Status   status;
1973   int          i,nz,ierr,j,rstart,rend,fd;
1974   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1975   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1976   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1985     if (header[3] < 0) {
1986       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1987     }
1988   }
1989 
1990   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1991   M = header[1]; N = header[2];
1992   /* determine ownership of all rows */
1993   m = M/size + ((M % size) > rank);
1994   rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1995   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1996   rowners[0] = 0;
1997   for (i=2; i<=size; i++) {
1998     rowners[i] += rowners[i-1];
1999   }
2000   rstart = rowners[rank];
2001   rend   = rowners[rank+1];
2002 
2003   /* distribute row lengths to all processors */
2004   ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens);
2005   offlens = ourlens + (rend-rstart);
2006   if (!rank) {
2007     rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths);
2008     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2009     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2010     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2011     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2012     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2013   } else {
2014     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2015   }
2016 
2017   if (!rank) {
2018     /* calculate the number of nonzeros on each processor */
2019     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2020     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2021     for (i=0; i<size; i++) {
2022       for (j=rowners[i]; j< rowners[i+1]; j++) {
2023         procsnz[i] += rowlengths[j];
2024       }
2025     }
2026     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2027 
2028     /* determine max buffer needed and allocate it */
2029     maxnz = 0;
2030     for (i=0; i<size; i++) {
2031       maxnz = PetscMax(maxnz,procsnz[i]);
2032     }
2033     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2034 
2035     /* read in my part of the matrix column indices  */
2036     nz     = procsnz[0];
2037     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2038     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2039 
2040     /* read in every one elses and ship off */
2041     for (i=1; i<size; i++) {
2042       nz   = procsnz[i];
2043       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2044       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2045     }
2046     ierr = PetscFree(cols);CHKERRQ(ierr);
2047   } else {
2048     /* determine buffer space needed for message */
2049     nz = 0;
2050     for (i=0; i<m; i++) {
2051       nz += ourlens[i];
2052     }
2053     mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols);
2054 
2055     /* receive message of column indices*/
2056     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2057     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2058     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2059   }
2060 
2061   /* determine column ownership if matrix is not square */
2062   if (N != M) {
2063     n      = N/size + ((N % size) > rank);
2064     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2065     cstart = cend - n;
2066   } else {
2067     cstart = rstart;
2068     cend   = rend;
2069     n      = cend - cstart;
2070   }
2071 
2072   /* loop over local rows, determining number of off diagonal entries */
2073   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2074   jj = 0;
2075   for (i=0; i<m; i++) {
2076     for (j=0; j<ourlens[i]; j++) {
2077       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2078       jj++;
2079     }
2080   }
2081 
2082   /* create our matrix */
2083   for (i=0; i<m; i++) {
2084     ourlens[i] -= offlens[i];
2085   }
2086   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2087   A = *newmat;
2088   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2089   for (i=0; i<m; i++) {
2090     ourlens[i] += offlens[i];
2091   }
2092 
2093   if (!rank) {
2094     vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals);
2095 
2096     /* read in my part of the matrix numerical values  */
2097     nz   = procsnz[0];
2098     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2099 
2100     /* insert into matrix */
2101     jj      = rstart;
2102     smycols = mycols;
2103     svals   = vals;
2104     for (i=0; i<m; i++) {
2105       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2106       smycols += ourlens[i];
2107       svals   += ourlens[i];
2108       jj++;
2109     }
2110 
2111     /* read in other processors and ship out */
2112     for (i=1; i<size; i++) {
2113       nz   = procsnz[i];
2114       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2115       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2116     }
2117     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2118   } else {
2119     /* receive numeric values */
2120     vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals);
2121 
2122     /* receive message of values*/
2123     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2124     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2125     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2126 
2127     /* insert into matrix */
2128     jj      = rstart;
2129     smycols = mycols;
2130     svals   = vals;
2131     for (i=0; i<m; i++) {
2132       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2133       smycols += ourlens[i];
2134       svals   += ourlens[i];
2135       jj++;
2136     }
2137   }
2138   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2139   ierr = PetscFree(vals);CHKERRQ(ierr);
2140   ierr = PetscFree(mycols);CHKERRQ(ierr);
2141   ierr = PetscFree(rowners);CHKERRQ(ierr);
2142 
2143   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNC__
2149 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ"
2150 /*
2151     Not great since it makes two copies of the submatrix, first an SeqAIJ
2152   in local and then by concatenating the local matrices the end result.
2153   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2154 */
2155 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2156 {
2157   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2158   Mat        *local,M,Mreuse;
2159   Scalar     *vwork,*aa;
2160   MPI_Comm   comm = mat->comm;
2161   Mat_SeqAIJ *aij;
2162   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
2163 
2164   PetscFunctionBegin;
2165   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2167 
2168   if (call ==  MAT_REUSE_MATRIX) {
2169     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2170     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before,cannot reuse");
2171     local = &Mreuse;
2172     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2173   } else {
2174     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2175     Mreuse = *local;
2176     ierr = PetscFree(local);CHKERRQ(ierr);
2177   }
2178 
2179   /*
2180       m - number of local rows
2181       n - number of columns (same on all processors)
2182       rstart - first row in new global matrix generated
2183   */
2184   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2185   if (call == MAT_INITIAL_MATRIX) {
2186     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2187     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2188     ii  = aij->i;
2189     jj  = aij->j;
2190 
2191     /*
2192         Determine the number of non-zeros in the diagonal and off-diagonal
2193         portions of the matrix in order to do correct preallocation
2194     */
2195 
2196     /* first get start and end of "diagonal" columns */
2197     if (csize == PETSC_DECIDE) {
2198       nlocal = n/size + ((n % size) > rank);
2199     } else {
2200       nlocal = csize;
2201     }
2202     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2203     rstart = rend - nlocal;
2204     if (rank == size - 1 && rend != n) {
2205       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2206     }
2207 
2208     /* next, compute all the lengths */
2209     dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens);
2210     olens = dlens + m;
2211     for (i=0; i<m; i++) {
2212       jend = ii[i+1] - ii[i];
2213       olen = 0;
2214       dlen = 0;
2215       for (j=0; j<jend; j++) {
2216         if (*jj < rstart || *jj >= rend) olen++;
2217         else dlen++;
2218         jj++;
2219       }
2220       olens[i] = olen;
2221       dlens[i] = dlen;
2222     }
2223     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2224     ierr = PetscFree(dlens);CHKERRQ(ierr);
2225   } else {
2226     int ml,nl;
2227 
2228     M = *newmat;
2229     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2230     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2231     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2232     /*
2233          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2234        rather than the slower MatSetValues().
2235     */
2236     M->was_assembled = PETSC_TRUE;
2237     M->assembled     = PETSC_FALSE;
2238   }
2239   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2240   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2241   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2242   ii  = aij->i;
2243   jj  = aij->j;
2244   aa  = aij->a;
2245   for (i=0; i<m; i++) {
2246     row   = rstart + i;
2247     nz    = ii[i+1] - ii[i];
2248     cwork = jj;     jj += nz;
2249     vwork = aa;     aa += nz;
2250     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2251   }
2252 
2253   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2255   *newmat = M;
2256 
2257   /* save submatrix used in processor for next request */
2258   if (call ==  MAT_INITIAL_MATRIX) {
2259     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2260     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2261   }
2262 
2263   PetscFunctionReturn(0);
2264 }
2265