xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965) !
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode ierr;
759   int               nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
760   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
761   PetscScalar       *column_values;
762 
763   PetscFunctionBegin;
764   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
765   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
766   nz   = A->nz + B->nz;
767   if (!rank) {
768     header[0] = MAT_FILE_COOKIE;
769     header[1] = mat->M;
770     header[2] = mat->N;
771     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
773     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
774     /* get largest number of rows any processor has */
775     rlen = mat->m;
776     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
777     for (i=1; i<size; i++) {
778       rlen = PetscMax(rlen,range[i+1] - range[i]);
779     }
780   } else {
781     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
782     rlen = mat->m;
783   }
784 
785   /* load up the local row counts */
786   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
787   for (i=0; i<mat->m; i++) {
788     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
789   }
790 
791   /* store the row lengths to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       rlen = range[i+1] - range[i];
797       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802   }
803   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
804 
805   /* load up the local column indices */
806   nzmax = nz; /* )th processor needs space a largest processor needs */
807   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
808   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( (col = garray[B->j[j]]) > cstart) break;
813       column_indices[cnt++] = col;
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_indices[cnt++] = A->j[k] + cstart;
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_indices[cnt++] = garray[B->j[j]];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
823 
824   /* store the column indices to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
831       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_indices);CHKERRQ(ierr);
839 
840   /* load up the local column values */
841   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
842   cnt  = 0;
843   for (i=0; i<mat->m; i++) {
844     for (j=B->i[i]; j<B->i[i+1]; j++) {
845       if ( garray[B->j[j]] > cstart) break;
846       column_values[cnt++] = B->a[j];
847     }
848     for (k=A->i[i]; k<A->i[i+1]; k++) {
849       column_values[cnt++] = A->a[k];
850     }
851     for (; j<B->i[i+1]; j++) {
852       column_values[cnt++] = B->a[j];
853     }
854   }
855   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
856 
857   /* store the column values to the file */
858   if (!rank) {
859     MPI_Status status;
860     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
861     for (i=1; i<size; i++) {
862       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
863       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
864       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
866     }
867   } else {
868     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
869     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(column_values);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
878 {
879   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
880   PetscErrorCode ierr;
881   int               rank = aij->rank,size = aij->size;
882   PetscTruth        isdraw,iascii,flg,isbinary;
883   PetscViewer       sviewer;
884   PetscViewerFormat format;
885 
886   PetscFunctionBegin;
887   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
888   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
890   if (iascii) {
891     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
892     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
893       MatInfo info;
894       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
895       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
897       if (flg) {
898         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
899 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
900       } else {
901         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
902 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
903       }
904       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
905       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
906       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
908       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
909       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
910       PetscFunctionReturn(0);
911     } else if (format == PETSC_VIEWER_ASCII_INFO) {
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ *Aloc;
938     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     if (!rank) {
942       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
943     } else {
944       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     PetscLogObjectParent(mat,A);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = aij->rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = aij->rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1017 {
1018   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec          bb1;
1021   PetscScalar  mone=-1.0;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1093   Mat        A = mat->A,B = mat->B;
1094   PetscErrorCode ierr;
1095   PetscReal  isend[5],irecv[5];
1096 
1097   PetscFunctionBegin;
1098   info->block_size     = 1.0;
1099   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1100   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1101   isend[3] = info->memory;  isend[4] = info->mallocs;
1102   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1103   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1104   isend[3] += info->memory;  isend[4] += info->mallocs;
1105   if (flag == MAT_LOCAL) {
1106     info->nz_used      = isend[0];
1107     info->nz_allocated = isend[1];
1108     info->nz_unneeded  = isend[2];
1109     info->memory       = isend[3];
1110     info->mallocs      = isend[4];
1111   } else if (flag == MAT_GLOBAL_MAX) {
1112     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1113     info->nz_used      = irecv[0];
1114     info->nz_allocated = irecv[1];
1115     info->nz_unneeded  = irecv[2];
1116     info->memory       = irecv[3];
1117     info->mallocs      = irecv[4];
1118   } else if (flag == MAT_GLOBAL_SUM) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   }
1126   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1127   info->fill_ratio_needed = 0;
1128   info->factor_mallocs    = 0;
1129   info->rows_global       = (double)matin->M;
1130   info->columns_global    = (double)matin->N;
1131   info->rows_local        = (double)matin->m;
1132   info->columns_local     = (double)matin->N;
1133 
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSetOption_MPIAIJ"
1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1140 {
1141   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   switch (op) {
1146   case MAT_NO_NEW_NONZERO_LOCATIONS:
1147   case MAT_YES_NEW_NONZERO_LOCATIONS:
1148   case MAT_COLUMNS_UNSORTED:
1149   case MAT_COLUMNS_SORTED:
1150   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1151   case MAT_KEEP_ZEROED_ROWS:
1152   case MAT_NEW_NONZERO_LOCATION_ERR:
1153   case MAT_USE_INODES:
1154   case MAT_DO_NOT_USE_INODES:
1155   case MAT_IGNORE_ZERO_ENTRIES:
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_ROW_ORIENTED:
1160     a->roworiented = PETSC_TRUE;
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROWS_SORTED:
1165   case MAT_ROWS_UNSORTED:
1166   case MAT_YES_NEW_DIAGONALS:
1167     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1168     break;
1169   case MAT_COLUMN_ORIENTED:
1170     a->roworiented = PETSC_FALSE;
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_IGNORE_OFF_PROC_ENTRIES:
1175     a->donotstash = PETSC_TRUE;
1176     break;
1177   case MAT_NO_NEW_DIAGONALS:
1178     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1179   case MAT_SYMMETRIC:
1180   case MAT_STRUCTURALLY_SYMMETRIC:
1181   case MAT_HERMITIAN:
1182   case MAT_SYMMETRY_ETERNAL:
1183     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_NOT_SYMMETRIC:
1186   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1187   case MAT_NOT_HERMITIAN:
1188   case MAT_NOT_SYMMETRY_ETERNAL:
1189     break;
1190   default:
1191     SETERRQ(PETSC_ERR_SUP,"unknown option");
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatGetRow_MPIAIJ"
1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1199 {
1200   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1201   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1202   PetscErrorCode ierr;
1203   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1204   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1205   int          *cmap,*idx_p;
1206 
1207   PetscFunctionBegin;
1208   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1209   mat->getrowactive = PETSC_TRUE;
1210 
1211   if (!mat->rowvalues && (idx || v)) {
1212     /*
1213         allocate enough space to hold information from the longest row.
1214     */
1215     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1216     int     max = 1,tmp;
1217     for (i=0; i<matin->m; i++) {
1218       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1219       if (max < tmp) { max = tmp; }
1220     }
1221     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1222     mat->rowindices = (int*)(mat->rowvalues + max);
1223   }
1224 
1225   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1226   lrow = row - rstart;
1227 
1228   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1229   if (!v)   {pvA = 0; pvB = 0;}
1230   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1231   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1232   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1233   nztot = nzA + nzB;
1234 
1235   cmap  = mat->garray;
1236   if (v  || idx) {
1237     if (nztot) {
1238       /* Sort by increasing column numbers, assuming A and B already sorted */
1239       int imark = -1;
1240       if (v) {
1241         *v = v_p = mat->rowvalues;
1242         for (i=0; i<nzB; i++) {
1243           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1244           else break;
1245         }
1246         imark = i;
1247         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1248         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1249       }
1250       if (idx) {
1251         *idx = idx_p = mat->rowindices;
1252         if (imark > -1) {
1253           for (i=0; i<imark; i++) {
1254             idx_p[i] = cmap[cworkB[i]];
1255           }
1256         } else {
1257           for (i=0; i<nzB; i++) {
1258             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1264         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1265       }
1266     } else {
1267       if (idx) *idx = 0;
1268       if (v)   *v   = 0;
1269     }
1270   }
1271   *nz = nztot;
1272   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1273   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1280 {
1281   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1282 
1283   PetscFunctionBegin;
1284   if (aij->getrowactive == PETSC_FALSE) {
1285     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1286   }
1287   aij->getrowactive = PETSC_FALSE;
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatNorm_MPIAIJ"
1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1294 {
1295   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1296   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1297   PetscErrorCode ierr;
1298   int          i,j,cstart = aij->cstart;
1299   PetscReal    sum = 0.0;
1300   PetscScalar  *v;
1301 
1302   PetscFunctionBegin;
1303   if (aij->size == 1) {
1304     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1305   } else {
1306     if (type == NORM_FROBENIUS) {
1307       v = amat->a;
1308       for (i=0; i<amat->nz; i++) {
1309 #if defined(PETSC_USE_COMPLEX)
1310         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1311 #else
1312         sum += (*v)*(*v); v++;
1313 #endif
1314       }
1315       v = bmat->a;
1316       for (i=0; i<bmat->nz; i++) {
1317 #if defined(PETSC_USE_COMPLEX)
1318         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1319 #else
1320         sum += (*v)*(*v); v++;
1321 #endif
1322       }
1323       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1324       *norm = sqrt(*norm);
1325     } else if (type == NORM_1) { /* max column norm */
1326       PetscReal *tmp,*tmp2;
1327       int    *jj,*garray = aij->garray;
1328       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1329       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1330       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1331       *norm = 0.0;
1332       v = amat->a; jj = amat->j;
1333       for (j=0; j<amat->nz; j++) {
1334         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1335       }
1336       v = bmat->a; jj = bmat->j;
1337       for (j=0; j<bmat->nz; j++) {
1338         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1339       }
1340       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       for (j=0; j<mat->N; j++) {
1342         if (tmp2[j] > *norm) *norm = tmp2[j];
1343       }
1344       ierr = PetscFree(tmp);CHKERRQ(ierr);
1345       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1346     } else if (type == NORM_INFINITY) { /* max row norm */
1347       PetscReal ntemp = 0.0;
1348       for (j=0; j<aij->A->m; j++) {
1349         v = amat->a + amat->i[j];
1350         sum = 0.0;
1351         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1352           sum += PetscAbsScalar(*v); v++;
1353         }
1354         v = bmat->a + bmat->i[j];
1355         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1356           sum += PetscAbsScalar(*v); v++;
1357         }
1358         if (sum > ntemp) ntemp = sum;
1359       }
1360       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1361     } else {
1362       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1363     }
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPIAIJ"
1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1371 {
1372   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1373   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1374   PetscErrorCode ierr;
1375   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1376   Mat          B;
1377   PetscScalar  *array;
1378 
1379   PetscFunctionBegin;
1380   if (!matout && M != N) {
1381     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1382   }
1383 
1384   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1385   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1386   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1387 
1388   /* copy over the A part */
1389   Aloc = (Mat_SeqAIJ*)a->A->data;
1390   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1391   row = a->rstart;
1392   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1393   for (i=0; i<m; i++) {
1394     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1395     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1396   }
1397   aj = Aloc->j;
1398   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1399 
1400   /* copy over the B part */
1401   Aloc = (Mat_SeqAIJ*)a->B->data;
1402   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1403   row  = a->rstart;
1404   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1405   ct   = cols;
1406   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1407   for (i=0; i<m; i++) {
1408     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1409     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1410   }
1411   ierr = PetscFree(ct);CHKERRQ(ierr);
1412   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   if (matout) {
1415     *matout = B;
1416   } else {
1417     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1425 {
1426   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1427   Mat        a = aij->A,b = aij->B;
1428   PetscErrorCode ierr;
1429   int        s1,s2,s3;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1433   if (rr) {
1434     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1435     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1436     /* Overlap communication with computation. */
1437     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1438   }
1439   if (ll) {
1440     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1441     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1442     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1443   }
1444   /* scale  the diagonal block */
1445   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1446 
1447   if (rr) {
1448     /* Do a scatter end and then right scale the off-diagonal block */
1449     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1450     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1451   }
1452 
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1460 {
1461   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1462   PetscErrorCode ierr;
1463 
1464   PetscFunctionBegin;
1465   if (!a->rank) {
1466     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1474 {
1475   PetscFunctionBegin;
1476   *bs = 1;
1477   PetscFunctionReturn(0);
1478 }
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1482 {
1483   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatEqual_MPIAIJ"
1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1494 {
1495   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1496   Mat        a,b,c,d;
1497   PetscTruth flg;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   a = matA->A; b = matA->B;
1502   c = matB->A; d = matB->B;
1503 
1504   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1505   if (flg == PETSC_TRUE) {
1506     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1507   }
1508   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatCopy_MPIAIJ"
1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1515 {
1516   PetscErrorCode ierr;
1517   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1518   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1519 
1520   PetscFunctionBegin;
1521   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1522   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1523     /* because of the column compression in the off-processor part of the matrix a->B,
1524        the number of columns in a->B and b->B may be different, hence we cannot call
1525        the MatCopy() directly on the two parts. If need be, we can provide a more
1526        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1527        then copying the submatrices */
1528     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1529   } else {
1530     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1531     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1539 {
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #include "petscblaslapack.h"
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatAXPY_MPIAIJ"
1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1551 {
1552   PetscErrorCode ierr;
1553   int        one=1,i;
1554   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1555   Mat_SeqAIJ *x,*y;
1556 
1557   PetscFunctionBegin;
1558   if (str == SAME_NONZERO_PATTERN) {
1559     x = (Mat_SeqAIJ *)xx->A->data;
1560     y = (Mat_SeqAIJ *)yy->A->data;
1561     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1562     x = (Mat_SeqAIJ *)xx->B->data;
1563     y = (Mat_SeqAIJ *)yy->B->data;
1564     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1565   } else if (str == SUBSET_NONZERO_PATTERN) {
1566     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1567 
1568     x = (Mat_SeqAIJ *)xx->B->data;
1569     y = (Mat_SeqAIJ *)yy->B->data;
1570     if (y->xtoy && y->XtoY != xx->B) {
1571       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1572       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1573     }
1574     if (!y->xtoy) { /* get xtoy */
1575       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1576       y->XtoY = xx->B;
1577     }
1578     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1579   } else {
1580     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 /* -------------------------------------------------------------------*/
1586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1587        MatGetRow_MPIAIJ,
1588        MatRestoreRow_MPIAIJ,
1589        MatMult_MPIAIJ,
1590 /* 4*/ MatMultAdd_MPIAIJ,
1591        MatMultTranspose_MPIAIJ,
1592        MatMultTransposeAdd_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596 /*10*/ 0,
1597        0,
1598        0,
1599        MatRelax_MPIAIJ,
1600        MatTranspose_MPIAIJ,
1601 /*15*/ MatGetInfo_MPIAIJ,
1602        MatEqual_MPIAIJ,
1603        MatGetDiagonal_MPIAIJ,
1604        MatDiagonalScale_MPIAIJ,
1605        MatNorm_MPIAIJ,
1606 /*20*/ MatAssemblyBegin_MPIAIJ,
1607        MatAssemblyEnd_MPIAIJ,
1608        0,
1609        MatSetOption_MPIAIJ,
1610        MatZeroEntries_MPIAIJ,
1611 /*25*/ MatZeroRows_MPIAIJ,
1612 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1613        MatLUFactorSymbolic_MPIAIJ_TFS,
1614 #else
1615        0,
1616 #endif
1617        0,
1618        0,
1619        0,
1620 /*30*/ MatSetUpPreallocation_MPIAIJ,
1621        0,
1622        0,
1623        0,
1624        0,
1625 /*35*/ MatDuplicate_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630 /*40*/ MatAXPY_MPIAIJ,
1631        MatGetSubMatrices_MPIAIJ,
1632        MatIncreaseOverlap_MPIAIJ,
1633        MatGetValues_MPIAIJ,
1634        MatCopy_MPIAIJ,
1635 /*45*/ MatPrintHelp_MPIAIJ,
1636        MatScale_MPIAIJ,
1637        0,
1638        0,
1639        0,
1640 /*50*/ MatGetBlockSize_MPIAIJ,
1641        0,
1642        0,
1643        0,
1644        0,
1645 /*55*/ MatFDColoringCreate_MPIAIJ,
1646        0,
1647        MatSetUnfactored_MPIAIJ,
1648        0,
1649        0,
1650 /*60*/ MatGetSubMatrix_MPIAIJ,
1651        MatDestroy_MPIAIJ,
1652        MatView_MPIAIJ,
1653        MatGetPetscMaps_Petsc,
1654        0,
1655 /*65*/ 0,
1656        0,
1657        0,
1658        0,
1659        0,
1660 /*70*/ 0,
1661        0,
1662        MatSetColoring_MPIAIJ,
1663        MatSetValuesAdic_MPIAIJ,
1664        MatSetValuesAdifor_MPIAIJ,
1665 /*75*/ 0,
1666        0,
1667        0,
1668        0,
1669        0,
1670 /*80*/ 0,
1671        0,
1672        0,
1673        0,
1674 /*85*/ MatLoad_MPIAIJ,
1675        0,
1676        0,
1677        0,
1678        0,
1679 /*90*/ 0,
1680        MatMatMult_MPIAIJ_MPIAIJ,
1681        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1682        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1683 };
1684 
1685 /* ----------------------------------------------------------------------------------------*/
1686 
1687 EXTERN_C_BEGIN
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1690 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1691 {
1692   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1697   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 EXTERN_C_END
1701 
1702 EXTERN_C_BEGIN
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1705 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1706 {
1707   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1712   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 #include "petscpc.h"
1718 EXTERN_C_BEGIN
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1721 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1722 {
1723   Mat_MPIAIJ   *b;
1724   PetscErrorCode ierr;
1725   int          i;
1726 
1727   PetscFunctionBegin;
1728   B->preallocated = PETSC_TRUE;
1729   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1730   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1731   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1732   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1733   if (d_nnz) {
1734     for (i=0; i<B->m; i++) {
1735       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1736     }
1737   }
1738   if (o_nnz) {
1739     for (i=0; i<B->m; i++) {
1740       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1741     }
1742   }
1743   b = (Mat_MPIAIJ*)B->data;
1744   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1745   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1746 
1747   PetscFunctionReturn(0);
1748 }
1749 EXTERN_C_END
1750 
1751 /*MC
1752    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1753 
1754    Options Database Keys:
1755 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1756 
1757   Level: beginner
1758 
1759 .seealso: MatCreateMPIAIJ
1760 M*/
1761 
1762 EXTERN_C_BEGIN
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatCreate_MPIAIJ"
1765 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1766 {
1767   Mat_MPIAIJ *b;
1768   PetscErrorCode ierr;
1769   int        i,size;
1770 
1771   PetscFunctionBegin;
1772   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1773 
1774   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1775   B->data         = (void*)b;
1776   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1777   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1778   B->factor       = 0;
1779   B->assembled    = PETSC_FALSE;
1780   B->mapping      = 0;
1781 
1782   B->insertmode      = NOT_SET_VALUES;
1783   b->size            = size;
1784   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1785 
1786   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1787   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1788 
1789   /* the information in the maps duplicates the information computed below, eventually
1790      we should remove the duplicate information that is not contained in the maps */
1791   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1792   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1793 
1794   /* build local table of row and column ownerships */
1795   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1796   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1797   b->cowners = b->rowners + b->size + 2;
1798   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1799   b->rowners[0] = 0;
1800   for (i=2; i<=b->size; i++) {
1801     b->rowners[i] += b->rowners[i-1];
1802   }
1803   b->rstart = b->rowners[b->rank];
1804   b->rend   = b->rowners[b->rank+1];
1805   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1806   b->cowners[0] = 0;
1807   for (i=2; i<=b->size; i++) {
1808     b->cowners[i] += b->cowners[i-1];
1809   }
1810   b->cstart = b->cowners[b->rank];
1811   b->cend   = b->cowners[b->rank+1];
1812 
1813   /* build cache for off array entries formed */
1814   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1815   b->donotstash  = PETSC_FALSE;
1816   b->colmap      = 0;
1817   b->garray      = 0;
1818   b->roworiented = PETSC_TRUE;
1819 
1820   /* stuff used for matrix vector multiply */
1821   b->lvec      = PETSC_NULL;
1822   b->Mvctx     = PETSC_NULL;
1823 
1824   /* stuff for MatGetRow() */
1825   b->rowindices   = 0;
1826   b->rowvalues    = 0;
1827   b->getrowactive = PETSC_FALSE;
1828 
1829   /* Explicitly create 2 MATSEQAIJ matrices. */
1830   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1831   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1832   PetscLogObjectParent(B,b->A);
1833   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1834   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1835   PetscLogObjectParent(B,b->B);
1836 
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1838                                      "MatStoreValues_MPIAIJ",
1839                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1841                                      "MatRetrieveValues_MPIAIJ",
1842                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1844 				     "MatGetDiagonalBlock_MPIAIJ",
1845                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1847 				     "MatIsTranspose_MPIAIJ",
1848 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1850 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1851 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1853 				     "MatDiagonalScaleLocal_MPIAIJ",
1854 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 EXTERN_C_END
1858 
1859 /*MC
1860    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1861 
1862    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1863    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1864   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1865   for communicators controlling multiple processes.  It is recommended that you call both of
1866   the above preallocation routines for simplicity.
1867 
1868    Options Database Keys:
1869 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1870 
1871   Level: beginner
1872 
1873 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1874 M*/
1875 
1876 EXTERN_C_BEGIN
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatCreate_AIJ"
1879 PetscErrorCode MatCreate_AIJ(Mat A)
1880 {
1881   PetscErrorCode ierr;
1882   int size;
1883 
1884   PetscFunctionBegin;
1885   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1886   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1887   if (size == 1) {
1888     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1889   } else {
1890     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1891   }
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1899 {
1900   Mat        mat;
1901   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   *newmat       = 0;
1906   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1907   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1908   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1909   a    = (Mat_MPIAIJ*)mat->data;
1910 
1911   mat->factor       = matin->factor;
1912   mat->assembled    = PETSC_TRUE;
1913   mat->insertmode   = NOT_SET_VALUES;
1914   mat->preallocated = PETSC_TRUE;
1915 
1916   a->rstart       = oldmat->rstart;
1917   a->rend         = oldmat->rend;
1918   a->cstart       = oldmat->cstart;
1919   a->cend         = oldmat->cend;
1920   a->size         = oldmat->size;
1921   a->rank         = oldmat->rank;
1922   a->donotstash   = oldmat->donotstash;
1923   a->roworiented  = oldmat->roworiented;
1924   a->rowindices   = 0;
1925   a->rowvalues    = 0;
1926   a->getrowactive = PETSC_FALSE;
1927 
1928   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1929   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1935     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1936     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     int len;
1941     len  = oldmat->B->n;
1942     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1943     PetscLogObjectMemory(mat,len*sizeof(int));
1944     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1945   } else a->garray = 0;
1946 
1947   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1948   PetscLogObjectParent(mat,a->lvec);
1949   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1950   PetscLogObjectParent(mat,a->Mvctx);
1951   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1952   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1953   PetscLogObjectParent(mat,a->A);
1954   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1955   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1956   PetscLogObjectParent(mat,a->B);
1957   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1958   *newmat = mat;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #include "petscsys.h"
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatLoad_MPIAIJ"
1966 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1967 {
1968   Mat          A;
1969   PetscScalar  *vals,*svals;
1970   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1971   MPI_Status   status;
1972   PetscErrorCode ierr;
1973   int          i,nz,j,rstart,rend,fd;
1974   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1975   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1976   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1985     if (header[3] < 0) {
1986       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1987     }
1988   }
1989 
1990   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1991   M = header[1]; N = header[2];
1992   /* determine ownership of all rows */
1993   m = M/size + ((M % size) > rank);
1994   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1995   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1996   rowners[0] = 0;
1997   for (i=2; i<=size; i++) {
1998     rowners[i] += rowners[i-1];
1999   }
2000   rstart = rowners[rank];
2001   rend   = rowners[rank+1];
2002 
2003   /* distribute row lengths to all processors */
2004   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2005   offlens = ourlens + (rend-rstart);
2006   if (!rank) {
2007     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2008     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2009     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2010     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2011     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2012     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2013   } else {
2014     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2015   }
2016 
2017   if (!rank) {
2018     /* calculate the number of nonzeros on each processor */
2019     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2021     for (i=0; i<size; i++) {
2022       for (j=rowners[i]; j< rowners[i+1]; j++) {
2023         procsnz[i] += rowlengths[j];
2024       }
2025     }
2026     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2027 
2028     /* determine max buffer needed and allocate it */
2029     maxnz = 0;
2030     for (i=0; i<size; i++) {
2031       maxnz = PetscMax(maxnz,procsnz[i]);
2032     }
2033     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2034 
2035     /* read in my part of the matrix column indices  */
2036     nz   = procsnz[0];
2037     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2038     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2039 
2040     /* read in every one elses and ship off */
2041     for (i=1; i<size; i++) {
2042       nz   = procsnz[i];
2043       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2044       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2045     }
2046     ierr = PetscFree(cols);CHKERRQ(ierr);
2047   } else {
2048     /* determine buffer space needed for message */
2049     nz = 0;
2050     for (i=0; i<m; i++) {
2051       nz += ourlens[i];
2052     }
2053     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2054 
2055     /* receive message of column indices*/
2056     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2057     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2058     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2059   }
2060 
2061   /* determine column ownership if matrix is not square */
2062   if (N != M) {
2063     n      = N/size + ((N % size) > rank);
2064     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2065     cstart = cend - n;
2066   } else {
2067     cstart = rstart;
2068     cend   = rend;
2069     n      = cend - cstart;
2070   }
2071 
2072   /* loop over local rows, determining number of off diagonal entries */
2073   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2074   jj = 0;
2075   for (i=0; i<m; i++) {
2076     for (j=0; j<ourlens[i]; j++) {
2077       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2078       jj++;
2079     }
2080   }
2081 
2082   /* create our matrix */
2083   for (i=0; i<m; i++) {
2084     ourlens[i] -= offlens[i];
2085   }
2086   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2087   ierr = MatSetType(A,type);CHKERRQ(ierr);
2088   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2089 
2090   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2091   for (i=0; i<m; i++) {
2092     ourlens[i] += offlens[i];
2093   }
2094 
2095   if (!rank) {
2096     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2097 
2098     /* read in my part of the matrix numerical values  */
2099     nz   = procsnz[0];
2100     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2101 
2102     /* insert into matrix */
2103     jj      = rstart;
2104     smycols = mycols;
2105     svals   = vals;
2106     for (i=0; i<m; i++) {
2107       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2108       smycols += ourlens[i];
2109       svals   += ourlens[i];
2110       jj++;
2111     }
2112 
2113     /* read in other processors and ship out */
2114     for (i=1; i<size; i++) {
2115       nz   = procsnz[i];
2116       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2117       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2118     }
2119     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2120   } else {
2121     /* receive numeric values */
2122     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2123 
2124     /* receive message of values*/
2125     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2126     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2127     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2128 
2129     /* insert into matrix */
2130     jj      = rstart;
2131     smycols = mycols;
2132     svals   = vals;
2133     for (i=0; i<m; i++) {
2134       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2135       smycols += ourlens[i];
2136       svals   += ourlens[i];
2137       jj++;
2138     }
2139   }
2140   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2141   ierr = PetscFree(vals);CHKERRQ(ierr);
2142   ierr = PetscFree(mycols);CHKERRQ(ierr);
2143   ierr = PetscFree(rowners);CHKERRQ(ierr);
2144 
2145   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   *newmat = A;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2153 /*
2154     Not great since it makes two copies of the submatrix, first an SeqAIJ
2155   in local and then by concatenating the local matrices the end result.
2156   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2157 */
2158 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2159 {
2160   PetscErrorCode ierr;
2161   int          i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2162   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2163   Mat          *local,M,Mreuse;
2164   PetscScalar  *vwork,*aa;
2165   MPI_Comm     comm = mat->comm;
2166   Mat_SeqAIJ   *aij;
2167 
2168 
2169   PetscFunctionBegin;
2170   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2172 
2173   if (call ==  MAT_REUSE_MATRIX) {
2174     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2175     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2176     local = &Mreuse;
2177     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2178   } else {
2179     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2180     Mreuse = *local;
2181     ierr   = PetscFree(local);CHKERRQ(ierr);
2182   }
2183 
2184   /*
2185       m - number of local rows
2186       n - number of columns (same on all processors)
2187       rstart - first row in new global matrix generated
2188   */
2189   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2190   if (call == MAT_INITIAL_MATRIX) {
2191     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2192     ii  = aij->i;
2193     jj  = aij->j;
2194 
2195     /*
2196         Determine the number of non-zeros in the diagonal and off-diagonal
2197         portions of the matrix in order to do correct preallocation
2198     */
2199 
2200     /* first get start and end of "diagonal" columns */
2201     if (csize == PETSC_DECIDE) {
2202       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2203       if (mglobal == n) { /* square matrix */
2204 	nlocal = m;
2205       } else {
2206         nlocal = n/size + ((n % size) > rank);
2207       }
2208     } else {
2209       nlocal = csize;
2210     }
2211     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2212     rstart = rend - nlocal;
2213     if (rank == size - 1 && rend != n) {
2214       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2215     }
2216 
2217     /* next, compute all the lengths */
2218     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2219     olens = dlens + m;
2220     for (i=0; i<m; i++) {
2221       jend = ii[i+1] - ii[i];
2222       olen = 0;
2223       dlen = 0;
2224       for (j=0; j<jend; j++) {
2225         if (*jj < rstart || *jj >= rend) olen++;
2226         else dlen++;
2227         jj++;
2228       }
2229       olens[i] = olen;
2230       dlens[i] = dlen;
2231     }
2232     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2233     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2234     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2235     ierr = PetscFree(dlens);CHKERRQ(ierr);
2236   } else {
2237     int ml,nl;
2238 
2239     M = *newmat;
2240     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2241     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2242     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2243     /*
2244          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2245        rather than the slower MatSetValues().
2246     */
2247     M->was_assembled = PETSC_TRUE;
2248     M->assembled     = PETSC_FALSE;
2249   }
2250   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2251   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2252   ii  = aij->i;
2253   jj  = aij->j;
2254   aa  = aij->a;
2255   for (i=0; i<m; i++) {
2256     row   = rstart + i;
2257     nz    = ii[i+1] - ii[i];
2258     cwork = jj;     jj += nz;
2259     vwork = aa;     aa += nz;
2260     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2261   }
2262 
2263   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2264   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2265   *newmat = M;
2266 
2267   /* save submatrix used in processor for next request */
2268   if (call ==  MAT_INITIAL_MATRIX) {
2269     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2270     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2271   }
2272 
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2278 /*@C
2279    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2280    (the default parallel PETSc format).  For good matrix assembly performance
2281    the user should preallocate the matrix storage by setting the parameters
2282    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2283    performance can be increased by more than a factor of 50.
2284 
2285    Collective on MPI_Comm
2286 
2287    Input Parameters:
2288 +  A - the matrix
2289 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2290            (same value is used for all local rows)
2291 .  d_nnz - array containing the number of nonzeros in the various rows of the
2292            DIAGONAL portion of the local submatrix (possibly different for each row)
2293            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2294            The size of this array is equal to the number of local rows, i.e 'm'.
2295            You must leave room for the diagonal entry even if it is zero.
2296 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2297            submatrix (same value is used for all local rows).
2298 -  o_nnz - array containing the number of nonzeros in the various rows of the
2299            OFF-DIAGONAL portion of the local submatrix (possibly different for
2300            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2301            structure. The size of this array is equal to the number
2302            of local rows, i.e 'm'.
2303 
2304    The AIJ format (also called the Yale sparse matrix format or
2305    compressed row storage), is fully compatible with standard Fortran 77
2306    storage.  That is, the stored row and column indices can begin at
2307    either one (as in Fortran) or zero.  See the users manual for details.
2308 
2309    The user MUST specify either the local or global matrix dimensions
2310    (possibly both).
2311 
2312    The parallel matrix is partitioned such that the first m0 rows belong to
2313    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2314    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2315 
2316    The DIAGONAL portion of the local submatrix of a processor can be defined
2317    as the submatrix which is obtained by extraction the part corresponding
2318    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2319    first row that belongs to the processor, and r2 is the last row belonging
2320    to the this processor. This is a square mxm matrix. The remaining portion
2321    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2322 
2323    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2324 
2325    By default, this format uses inodes (identical nodes) when possible.
2326    We search for consecutive rows with the same nonzero structure, thereby
2327    reusing matrix information to achieve increased efficiency.
2328 
2329    Options Database Keys:
2330 +  -mat_aij_no_inode  - Do not use inodes
2331 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2332 -  -mat_aij_oneindex - Internally use indexing starting at 1
2333         rather than 0.  Note that when calling MatSetValues(),
2334         the user still MUST index entries starting at 0!
2335 
2336    Example usage:
2337 
2338    Consider the following 8x8 matrix with 34 non-zero values, that is
2339    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2340    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2341    as follows:
2342 
2343 .vb
2344             1  2  0  |  0  3  0  |  0  4
2345     Proc0   0  5  6  |  7  0  0  |  8  0
2346             9  0 10  | 11  0  0  | 12  0
2347     -------------------------------------
2348            13  0 14  | 15 16 17  |  0  0
2349     Proc1   0 18  0  | 19 20 21  |  0  0
2350             0  0  0  | 22 23  0  | 24  0
2351     -------------------------------------
2352     Proc2  25 26 27  |  0  0 28  | 29  0
2353            30  0  0  | 31 32 33  |  0 34
2354 .ve
2355 
2356    This can be represented as a collection of submatrices as:
2357 
2358 .vb
2359       A B C
2360       D E F
2361       G H I
2362 .ve
2363 
2364    Where the submatrices A,B,C are owned by proc0, D,E,F are
2365    owned by proc1, G,H,I are owned by proc2.
2366 
2367    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2368    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2369    The 'M','N' parameters are 8,8, and have the same values on all procs.
2370 
2371    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2372    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2373    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2374    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2375    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2376    matrix, ans [DF] as another SeqAIJ matrix.
2377 
2378    When d_nz, o_nz parameters are specified, d_nz storage elements are
2379    allocated for every row of the local diagonal submatrix, and o_nz
2380    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2381    One way to choose d_nz and o_nz is to use the max nonzerors per local
2382    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2383    In this case, the values of d_nz,o_nz are:
2384 .vb
2385      proc0 : dnz = 2, o_nz = 2
2386      proc1 : dnz = 3, o_nz = 2
2387      proc2 : dnz = 1, o_nz = 4
2388 .ve
2389    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2390    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2391    for proc3. i.e we are using 12+15+10=37 storage locations to store
2392    34 values.
2393 
2394    When d_nnz, o_nnz parameters are specified, the storage is specified
2395    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2396    In the above case the values for d_nnz,o_nnz are:
2397 .vb
2398      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2399      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2400      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2401 .ve
2402    Here the space allocated is sum of all the above values i.e 34, and
2403    hence pre-allocation is perfect.
2404 
2405    Level: intermediate
2406 
2407 .keywords: matrix, aij, compressed row, sparse, parallel
2408 
2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2410 @*/
2411 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2412 {
2413   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2414 
2415   PetscFunctionBegin;
2416   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2417   if (f) {
2418     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2419   }
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatCreateMPIAIJ"
2425 /*@C
2426    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2427    (the default parallel PETSc format).  For good matrix assembly performance
2428    the user should preallocate the matrix storage by setting the parameters
2429    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2430    performance can be increased by more than a factor of 50.
2431 
2432    Collective on MPI_Comm
2433 
2434    Input Parameters:
2435 +  comm - MPI communicator
2436 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2437            This value should be the same as the local size used in creating the
2438            y vector for the matrix-vector product y = Ax.
2439 .  n - This value should be the same as the local size used in creating the
2440        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2441        calculated if N is given) For square matrices n is almost always m.
2442 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2443 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2444 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2445            (same value is used for all local rows)
2446 .  d_nnz - array containing the number of nonzeros in the various rows of the
2447            DIAGONAL portion of the local submatrix (possibly different for each row)
2448            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2449            The size of this array is equal to the number of local rows, i.e 'm'.
2450            You must leave room for the diagonal entry even if it is zero.
2451 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2452            submatrix (same value is used for all local rows).
2453 -  o_nnz - array containing the number of nonzeros in the various rows of the
2454            OFF-DIAGONAL portion of the local submatrix (possibly different for
2455            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2456            structure. The size of this array is equal to the number
2457            of local rows, i.e 'm'.
2458 
2459    Output Parameter:
2460 .  A - the matrix
2461 
2462    Notes:
2463    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2464    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2465    storage requirements for this matrix.
2466 
2467    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2468    processor than it must be used on all processors that share the object for
2469    that argument.
2470 
2471    The AIJ format (also called the Yale sparse matrix format or
2472    compressed row storage), is fully compatible with standard Fortran 77
2473    storage.  That is, the stored row and column indices can begin at
2474    either one (as in Fortran) or zero.  See the users manual for details.
2475 
2476    The user MUST specify either the local or global matrix dimensions
2477    (possibly both).
2478 
2479    The parallel matrix is partitioned such that the first m0 rows belong to
2480    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2481    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2482 
2483    The DIAGONAL portion of the local submatrix of a processor can be defined
2484    as the submatrix which is obtained by extraction the part corresponding
2485    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2486    first row that belongs to the processor, and r2 is the last row belonging
2487    to the this processor. This is a square mxm matrix. The remaining portion
2488    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2489 
2490    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2491 
2492    When calling this routine with a single process communicator, a matrix of
2493    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2494    type of communicator, use the construction mechanism:
2495      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2496 
2497    By default, this format uses inodes (identical nodes) when possible.
2498    We search for consecutive rows with the same nonzero structure, thereby
2499    reusing matrix information to achieve increased efficiency.
2500 
2501    Options Database Keys:
2502 +  -mat_aij_no_inode  - Do not use inodes
2503 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2504 -  -mat_aij_oneindex - Internally use indexing starting at 1
2505         rather than 0.  Note that when calling MatSetValues(),
2506         the user still MUST index entries starting at 0!
2507 
2508 
2509    Example usage:
2510 
2511    Consider the following 8x8 matrix with 34 non-zero values, that is
2512    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2513    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2514    as follows:
2515 
2516 .vb
2517             1  2  0  |  0  3  0  |  0  4
2518     Proc0   0  5  6  |  7  0  0  |  8  0
2519             9  0 10  | 11  0  0  | 12  0
2520     -------------------------------------
2521            13  0 14  | 15 16 17  |  0  0
2522     Proc1   0 18  0  | 19 20 21  |  0  0
2523             0  0  0  | 22 23  0  | 24  0
2524     -------------------------------------
2525     Proc2  25 26 27  |  0  0 28  | 29  0
2526            30  0  0  | 31 32 33  |  0 34
2527 .ve
2528 
2529    This can be represented as a collection of submatrices as:
2530 
2531 .vb
2532       A B C
2533       D E F
2534       G H I
2535 .ve
2536 
2537    Where the submatrices A,B,C are owned by proc0, D,E,F are
2538    owned by proc1, G,H,I are owned by proc2.
2539 
2540    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2541    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2542    The 'M','N' parameters are 8,8, and have the same values on all procs.
2543 
2544    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2545    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2546    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2547    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2548    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2549    matrix, ans [DF] as another SeqAIJ matrix.
2550 
2551    When d_nz, o_nz parameters are specified, d_nz storage elements are
2552    allocated for every row of the local diagonal submatrix, and o_nz
2553    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2554    One way to choose d_nz and o_nz is to use the max nonzerors per local
2555    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2556    In this case, the values of d_nz,o_nz are:
2557 .vb
2558      proc0 : dnz = 2, o_nz = 2
2559      proc1 : dnz = 3, o_nz = 2
2560      proc2 : dnz = 1, o_nz = 4
2561 .ve
2562    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2563    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2564    for proc3. i.e we are using 12+15+10=37 storage locations to store
2565    34 values.
2566 
2567    When d_nnz, o_nnz parameters are specified, the storage is specified
2568    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2569    In the above case the values for d_nnz,o_nnz are:
2570 .vb
2571      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2572      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2573      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2574 .ve
2575    Here the space allocated is sum of all the above values i.e 34, and
2576    hence pre-allocation is perfect.
2577 
2578    Level: intermediate
2579 
2580 .keywords: matrix, aij, compressed row, sparse, parallel
2581 
2582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2583 @*/
2584 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2585 {
2586   PetscErrorCode ierr;
2587   int size;
2588 
2589   PetscFunctionBegin;
2590   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2592   if (size > 1) {
2593     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2594     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2595   } else {
2596     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2597     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2598   }
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2605 {
2606   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2607   PetscFunctionBegin;
2608   *Ad     = a->A;
2609   *Ao     = a->B;
2610   *colmap = a->garray;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2617 {
2618   PetscErrorCode ierr;
2619   int        i;
2620   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2621 
2622   PetscFunctionBegin;
2623   if (coloring->ctype == IS_COLORING_LOCAL) {
2624     ISColoringValue *allcolors,*colors;
2625     ISColoring      ocoloring;
2626 
2627     /* set coloring for diagonal portion */
2628     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2629 
2630     /* set coloring for off-diagonal portion */
2631     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2632     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2633     for (i=0; i<a->B->n; i++) {
2634       colors[i] = allcolors[a->garray[i]];
2635     }
2636     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2637     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2638     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2640   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2641     ISColoringValue *colors;
2642     int             *larray;
2643     ISColoring      ocoloring;
2644 
2645     /* set coloring for diagonal portion */
2646     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2647     for (i=0; i<a->A->n; i++) {
2648       larray[i] = i + a->cstart;
2649     }
2650     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2651     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2652     for (i=0; i<a->A->n; i++) {
2653       colors[i] = coloring->colors[larray[i]];
2654     }
2655     ierr = PetscFree(larray);CHKERRQ(ierr);
2656     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2657     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2658     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2659 
2660     /* set coloring for off-diagonal portion */
2661     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2662     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2663     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2664     for (i=0; i<a->B->n; i++) {
2665       colors[i] = coloring->colors[larray[i]];
2666     }
2667     ierr = PetscFree(larray);CHKERRQ(ierr);
2668     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2669     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2670     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2671   } else {
2672     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2673   }
2674 
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2680 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2681 {
2682   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2683   PetscErrorCode ierr;
2684 
2685   PetscFunctionBegin;
2686   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2687   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2693 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2694 {
2695   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2700   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatMerge"
2706 /*@C
2707       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2708                  matrices from each processor
2709 
2710     Collective on MPI_Comm
2711 
2712    Input Parameters:
2713 +    comm - the communicators the parallel matrix will live on
2714 .    inmat - the input sequential matrices
2715 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2716 
2717    Output Parameter:
2718 .    outmat - the parallel matrix generated
2719 
2720     Level: advanced
2721 
2722    Notes: The number of columns of the matrix in EACH of the seperate files
2723       MUST be the same.
2724 
2725 @*/
2726 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat)
2727 {
2728   PetscErrorCode ierr;
2729   int               m,n,i,rstart,nnz,I,*dnz,*onz;
2730   const int         *indx;
2731   const PetscScalar *values;
2732   PetscMap          columnmap,rowmap;
2733 
2734   PetscFunctionBegin;
2735 
2736   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2737   if (scall == MAT_INITIAL_MATRIX){
2738     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2739     ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2740     ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2741     ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2742     ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2743     ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2744 
2745     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2746     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2747     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2748     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2749     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2750 
2751     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2752     for (i=0;i<m;i++) {
2753       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2754       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2755       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2756     }
2757     /* This routine will ONLY return MPIAIJ type matrix */
2758     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2759     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2760     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2761     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2762 
2763   } else if (scall == MAT_REUSE_MATRIX){
2764     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2765   } else {
2766     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2767   }
2768 
2769   for (i=0;i<m;i++) {
2770     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2771     I    = i + rstart;
2772     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2773     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2774   }
2775   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2776   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778 #ifdef OLD
2779   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2780 
2781   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2782   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2783   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2784   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2785   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2786   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2787 
2788   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2789   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2790   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2791   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2792   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2793 
2794   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2795   for (i=0;i<m;i++) {
2796     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2797     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2798     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2799   }
2800   /* This routine will ONLY return MPIAIJ type matrix */
2801   ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2802   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2803   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2804   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2805 
2806   for (i=0;i<m;i++) {
2807     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2808     I    = i + rstart;
2809     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2810     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2811   }
2812   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2813   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815 #endif
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 #undef __FUNCT__
2820 #define __FUNCT__ "MatFileSplit"
2821 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2822 {
2823   PetscErrorCode ierr;
2824   int               rank,m,N,i,rstart,nnz;
2825   size_t            len;
2826   const int         *indx;
2827   PetscViewer       out;
2828   char              *name;
2829   Mat               B;
2830   const PetscScalar *values;
2831 
2832   PetscFunctionBegin;
2833 
2834   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2835   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2836   /* Should this be the type of the diagonal block of A? */
2837   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2838   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2839   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2840   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2841   for (i=0;i<m;i++) {
2842     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2843     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2844     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2845   }
2846   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2847   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2848 
2849   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2850   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2851   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2852   sprintf(name,"%s.%d",outfile,rank);
2853   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2854   ierr = PetscFree(name);
2855   ierr = MatView(B,out);CHKERRQ(ierr);
2856   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2857   ierr = MatDestroy(B);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860