xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7c2000337a446092e4e06a20f2ff73813d334fd7)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.302 1999/10/01 21:21:16 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
455   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
456   nrecvs = work[rank];
457   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
458   nmax = work[rank];
459   ierr = PetscFree(work);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
463   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
464   for ( i=0; i<nrecvs; i++ ) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
473   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
474   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
475   starts[0] = 0;
476   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   for ( i=0; i<N; i++ ) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ISRestoreIndices(is,&rows);
481 
482   starts[0] = 0;
483   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
484   count = 0;
485   for ( i=0; i<size; i++ ) {
486     if (procs[i]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]  = n;
504     slen += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
511   count = 0;
512   for ( i=0; i<nrecvs; i++ ) {
513     values = rvalues + i*nmax;
514     for ( j=0; j<lens[i]; j++ ) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for ( i = 0; i < slen; i++ ) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
560     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatMult_MPIAIJ"
571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr,nt;
575 
576   PetscFunctionBegin;
577   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
578   if (nt != a->n) {
579     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
580   }
581   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
582   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
583   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatMultAdd_MPIAIJ"
590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
591 {
592   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
593   int        ierr;
594 
595   PetscFunctionBegin;
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatMultTrans_MPIAIJ"
605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   /* do nondiagonal part */
612   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
613   /* send it on its way */
614   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   /* do local part */
616   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
617   /* receive remote parts: note this assumes the values are not actually */
618   /* inserted in yy until the next line, which is true for my implementation*/
619   /* but is not perhaps always true. */
620   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
629   int        ierr;
630 
631   PetscFunctionBegin;
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   /* send it on its way */
635   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   /* do local part */
637   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
638   /* receive remote parts: note this assumes the values are not actually */
639   /* inserted in yy until the next line, which is true for my implementation*/
640   /* but is not perhaps always true. */
641   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646   This only works correctly for square matrices where the subblock A->A is the
647    diagonal block
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
652 {
653   int        ierr;
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655 
656   PetscFunctionBegin;
657   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
658   if (a->rstart != a->cstart || a->rend != a->cend) {
659     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
660   }
661   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatScale_MPIAIJ"
667 int MatScale_MPIAIJ(Scalar *aa,Mat A)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   PetscFunctionBegin;
673   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
674   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNC__
679 #define __FUNC__ "MatDestroy_MPIAIJ"
680 int MatDestroy_MPIAIJ(Mat mat)
681 {
682   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686 
687   if (mat->mapping) {
688     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
689   }
690   if (mat->bmapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
692   }
693   if (mat->rmap) {
694     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
695   }
696   if (mat->cmap) {
697     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_USE_LOG)
700   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
701 #endif
702   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
703   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
706 #if defined (PETSC_USE_CTABLE)
707   if (aij->colmap) TableDelete(aij->colmap);
708 #else
709   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
710 #endif
711   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
712   if (aij->lvec)   VecDestroy(aij->lvec);
713   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
714   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
715   ierr = PetscFree(aij);CHKERRQ(ierr);
716   PLogObjectDestroy(mat);
717   PetscHeaderDestroy(mat);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatView_MPIAIJ_Binary"
723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
724 {
725   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
726   int         ierr;
727 
728   PetscFunctionBegin;
729   if (aij->size == 1) {
730     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
731   }
732   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
739 {
740   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
741   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
742   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
743   int         size = aij->size;
744   FILE        *fd;
745 
746   PetscFunctionBegin;
747   if (PetscTypeCompare(viewer,ASCII_VIEWER)) {
748     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
749     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
750       MatInfo info;
751       int     flg;
752       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
753       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
754       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
755       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
756       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
757       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
758          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
759       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
760          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
761       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
762       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
763       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
764       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
765       fflush(fd);
766       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
767       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
768       PetscFunctionReturn(0);
769     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
770       PetscFunctionReturn(0);
771     }
772   } else if (PetscTypeCompare(viewer,DRAW_VIEWER)) {
773     Draw       draw;
774     PetscTruth isnull;
775     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
776     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
777   }
778 
779   if (size == 1) {
780     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
781   } else {
782     /* assemble the entire matrix onto first processor. */
783     Mat         A;
784     Mat_SeqAIJ *Aloc;
785     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
786     Scalar      *a;
787 
788     if (!rank) {
789       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
790     } else {
791       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     }
793     PLogObjectParent(mat,A);
794 
795     /* copy over the A part */
796     Aloc = (Mat_SeqAIJ*) aij->A->data;
797     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
798     row = aij->rstart;
799     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
800     for ( i=0; i<m; i++ ) {
801       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
802       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
803     }
804     aj = Aloc->j;
805     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
806 
807     /* copy over the B part */
808     Aloc = (Mat_SeqAIJ*) aij->B->data;
809     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
810     row = aij->rstart;
811     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
812     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
813     for ( i=0; i<m; i++ ) {
814       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
815       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
816     }
817     ierr = PetscFree(ct);CHKERRQ(ierr);
818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     /*
821        Everyone has to call to draw the matrix since the graphics waits are
822        synchronized across all processors that share the Draw object
823     */
824     if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) {
825       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
826     }
827     ierr = MatDestroy(A);CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNC__
833 #define __FUNC__ "MatView_MPIAIJ"
834 int MatView_MPIAIJ(Mat mat,Viewer viewer)
835 {
836   int         ierr;
837 
838   PetscFunctionBegin;
839   if (PetscTypeCompare(viewer,ASCII_VIEWER) || PetscTypeCompare(viewer,DRAW_VIEWER) ||
840       PetscTypeCompare(viewer,SOCKET_VIEWER) || PetscTypeCompare(viewer,BINARY_VIEWER)) {
841     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
842     /*
843   } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) {
844     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
845     */
846   } else {
847     SETERRQ(1,1,"Viewer type not supported by PETSc object");
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 /*
853     This has to provide several versions.
854 
855      2) a) use only local smoothing updating outer values only once.
856         b) local smoothing updating outer values each inner iteration
857      3) color updating out values betwen colors.
858 */
859 #undef __FUNC__
860 #define __FUNC__ "MatRelax_MPIAIJ"
861 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
862                            double fshift,int its,Vec xx)
863 {
864   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
865   Mat        AA = mat->A, BB = mat->B;
866   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
867   Scalar     *b,*x,*xs,*ls,d,*v,sum;
868   int        ierr,*idx, *diag;
869   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
870 
871   PetscFunctionBegin;
872   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
873   diag = A->diag;
874   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
875     if (flag & SOR_ZERO_INITIAL_GUESS) {
876       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
877       PetscFunctionReturn(0);
878     }
879     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
880     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
882     if (xx != bb) {
883       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
884     } else {
885       b = x;
886     }
887     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
888     xs = x + shift; /* shift by one for index start of 1 */
889     ls = ls + shift;
890     while (its--) {
891       /* go down through the rows */
892       for ( i=0; i<m; i++ ) {
893         n    = A->i[i+1] - A->i[i];
894 	PLogFlops(4*n+3);
895         idx  = A->j + A->i[i] + shift;
896         v    = A->a + A->i[i] + shift;
897         sum  = b[i];
898         SPARSEDENSEMDOT(sum,xs,v,idx,n);
899         d    = fshift + A->a[diag[i]+shift];
900         n    = B->i[i+1] - B->i[i];
901         idx  = B->j + B->i[i] + shift;
902         v    = B->a + B->i[i] + shift;
903         SPARSEDENSEMDOT(sum,ls,v,idx,n);
904         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
905       }
906       /* come up through the rows */
907       for ( i=m-1; i>-1; i-- ) {
908         n    = A->i[i+1] - A->i[i];
909 	PLogFlops(4*n+3)
910         idx  = A->j + A->i[i] + shift;
911         v    = A->a + A->i[i] + shift;
912         sum  = b[i];
913         SPARSEDENSEMDOT(sum,xs,v,idx,n);
914         d    = fshift + A->a[diag[i]+shift];
915         n    = B->i[i+1] - B->i[i];
916         idx  = B->j + B->i[i] + shift;
917         v    = B->a + B->i[i] + shift;
918         SPARSEDENSEMDOT(sum,ls,v,idx,n);
919         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
920       }
921     }
922     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
924     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
925   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
926     if (flag & SOR_ZERO_INITIAL_GUESS) {
927       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
928       PetscFunctionReturn(0);
929     }
930     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
931     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
932     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
933     if (xx != bb) {
934       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
935     } else {
936       b = x;
937     }
938     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
939     xs = x + shift; /* shift by one for index start of 1 */
940     ls = ls + shift;
941     while (its--) {
942       for ( i=0; i<m; i++ ) {
943         n    = A->i[i+1] - A->i[i];
944 	PLogFlops(4*n+3);
945         idx  = A->j + A->i[i] + shift;
946         v    = A->a + A->i[i] + shift;
947         sum  = b[i];
948         SPARSEDENSEMDOT(sum,xs,v,idx,n);
949         d    = fshift + A->a[diag[i]+shift];
950         n    = B->i[i+1] - B->i[i];
951         idx  = B->j + B->i[i] + shift;
952         v    = B->a + B->i[i] + shift;
953         SPARSEDENSEMDOT(sum,ls,v,idx,n);
954         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
955       }
956     }
957     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
959     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
960   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
961     if (flag & SOR_ZERO_INITIAL_GUESS) {
962       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
967     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
968     if (xx != bb) {
969       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
970     } else {
971       b = x;
972     }
973     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
974     xs = x + shift; /* shift by one for index start of 1 */
975     ls = ls + shift;
976     while (its--) {
977       for ( i=m-1; i>-1; i-- ) {
978         n    = A->i[i+1] - A->i[i];
979 	PLogFlops(4*n+3);
980         idx  = A->j + A->i[i] + shift;
981         v    = A->a + A->i[i] + shift;
982         sum  = b[i];
983         SPARSEDENSEMDOT(sum,xs,v,idx,n);
984         d    = fshift + A->a[diag[i]+shift];
985         n    = B->i[i+1] - B->i[i];
986         idx  = B->j + B->i[i] + shift;
987         v    = B->a + B->i[i] + shift;
988         SPARSEDENSEMDOT(sum,ls,v,idx,n);
989         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
990       }
991     }
992     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
993     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
994     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
995   } else {
996     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNC__
1002 #define __FUNC__ "MatGetInfo_MPIAIJ"
1003 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1004 {
1005   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1006   Mat        A = mat->A, B = mat->B;
1007   int        ierr;
1008   double     isend[5], irecv[5];
1009 
1010   PetscFunctionBegin;
1011   info->block_size     = 1.0;
1012   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1013   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1014   isend[3] = info->memory;  isend[4] = info->mallocs;
1015   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1016   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1017   isend[3] += info->memory;  isend[4] += info->mallocs;
1018   if (flag == MAT_LOCAL) {
1019     info->nz_used      = isend[0];
1020     info->nz_allocated = isend[1];
1021     info->nz_unneeded  = isend[2];
1022     info->memory       = isend[3];
1023     info->mallocs      = isend[4];
1024   } else if (flag == MAT_GLOBAL_MAX) {
1025     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1026     info->nz_used      = irecv[0];
1027     info->nz_allocated = irecv[1];
1028     info->nz_unneeded  = irecv[2];
1029     info->memory       = irecv[3];
1030     info->mallocs      = irecv[4];
1031   } else if (flag == MAT_GLOBAL_SUM) {
1032     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1033     info->nz_used      = irecv[0];
1034     info->nz_allocated = irecv[1];
1035     info->nz_unneeded  = irecv[2];
1036     info->memory       = irecv[3];
1037     info->mallocs      = irecv[4];
1038   }
1039   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1040   info->fill_ratio_needed = 0;
1041   info->factor_mallocs    = 0;
1042   info->rows_global       = (double)mat->M;
1043   info->columns_global    = (double)mat->N;
1044   info->rows_local        = (double)mat->m;
1045   info->columns_local     = (double)mat->N;
1046 
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNC__
1051 #define __FUNC__ "MatSetOption_MPIAIJ"
1052 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1055   int        ierr;
1056 
1057   PetscFunctionBegin;
1058   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1059       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1060       op == MAT_COLUMNS_UNSORTED ||
1061       op == MAT_COLUMNS_SORTED ||
1062       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1063       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1064         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1065         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1066   } else if (op == MAT_ROW_ORIENTED) {
1067     a->roworiented = 1;
1068     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1069     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1070   } else if (op == MAT_ROWS_SORTED ||
1071              op == MAT_ROWS_UNSORTED ||
1072              op == MAT_SYMMETRIC ||
1073              op == MAT_STRUCTURALLY_SYMMETRIC ||
1074              op == MAT_YES_NEW_DIAGONALS)
1075     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1076   else if (op == MAT_COLUMN_ORIENTED) {
1077     a->roworiented = 0;
1078     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1079     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1080   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1081     a->donotstash = 1;
1082   } else if (op == MAT_NO_NEW_DIAGONALS){
1083     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1084   } else {
1085     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNC__
1091 #define __FUNC__ "MatGetSize_MPIAIJ"
1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1093 {
1094   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1095 
1096   PetscFunctionBegin;
1097   if (m) *m = mat->M;
1098   if (n) *n = mat->N;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1104 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1105 {
1106   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1107 
1108   PetscFunctionBegin;
1109   if (m) *m = mat->m;
1110   if (n) *n = mat->n;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1116 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1117 {
1118   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1119 
1120   PetscFunctionBegin;
1121   *m = mat->rstart; *n = mat->rend;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNC__
1126 #define __FUNC__ "MatGetRow_MPIAIJ"
1127 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1128 {
1129   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1130   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1131   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1132   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1133   int        *cmap, *idx_p;
1134 
1135   PetscFunctionBegin;
1136   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1137   mat->getrowactive = PETSC_TRUE;
1138 
1139   if (!mat->rowvalues && (idx || v)) {
1140     /*
1141         allocate enough space to hold information from the longest row.
1142     */
1143     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1144     int     max = 1,m = mat->m,tmp;
1145     for ( i=0; i<m; i++ ) {
1146       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1147       if (max < tmp) { max = tmp; }
1148     }
1149     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1150     mat->rowindices = (int *) (mat->rowvalues + max);
1151   }
1152 
1153   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1154   lrow = row - rstart;
1155 
1156   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1157   if (!v)   {pvA = 0; pvB = 0;}
1158   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1159   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1160   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1161   nztot = nzA + nzB;
1162 
1163   cmap  = mat->garray;
1164   if (v  || idx) {
1165     if (nztot) {
1166       /* Sort by increasing column numbers, assuming A and B already sorted */
1167       int imark = -1;
1168       if (v) {
1169         *v = v_p = mat->rowvalues;
1170         for ( i=0; i<nzB; i++ ) {
1171           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1172           else break;
1173         }
1174         imark = i;
1175         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1176         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1177       }
1178       if (idx) {
1179         *idx = idx_p = mat->rowindices;
1180         if (imark > -1) {
1181           for ( i=0; i<imark; i++ ) {
1182             idx_p[i] = cmap[cworkB[i]];
1183           }
1184         } else {
1185           for ( i=0; i<nzB; i++ ) {
1186             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1187             else break;
1188           }
1189           imark = i;
1190         }
1191         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1192         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1193       }
1194     } else {
1195       if (idx) *idx = 0;
1196       if (v)   *v   = 0;
1197     }
1198   }
1199   *nz = nztot;
1200   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1201   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1207 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1208 {
1209   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1210 
1211   PetscFunctionBegin;
1212   if (aij->getrowactive == PETSC_FALSE) {
1213     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1214   }
1215   aij->getrowactive = PETSC_FALSE;
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatNorm_MPIAIJ"
1221 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1222 {
1223   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1224   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1225   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1226   double     sum = 0.0;
1227   Scalar     *v;
1228 
1229   PetscFunctionBegin;
1230   if (aij->size == 1) {
1231     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1232   } else {
1233     if (type == NORM_FROBENIUS) {
1234       v = amat->a;
1235       for (i=0; i<amat->nz; i++ ) {
1236 #if defined(PETSC_USE_COMPLEX)
1237         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1238 #else
1239         sum += (*v)*(*v); v++;
1240 #endif
1241       }
1242       v = bmat->a;
1243       for (i=0; i<bmat->nz; i++ ) {
1244 #if defined(PETSC_USE_COMPLEX)
1245         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1246 #else
1247         sum += (*v)*(*v); v++;
1248 #endif
1249       }
1250       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1251       *norm = sqrt(*norm);
1252     } else if (type == NORM_1) { /* max column norm */
1253       double *tmp, *tmp2;
1254       int    *jj, *garray = aij->garray;
1255       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1256       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1257       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1258       *norm = 0.0;
1259       v = amat->a; jj = amat->j;
1260       for ( j=0; j<amat->nz; j++ ) {
1261         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1262       }
1263       v = bmat->a; jj = bmat->j;
1264       for ( j=0; j<bmat->nz; j++ ) {
1265         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1266       }
1267       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1268       for ( j=0; j<aij->N; j++ ) {
1269         if (tmp2[j] > *norm) *norm = tmp2[j];
1270       }
1271       ierr = PetscFree(tmp);CHKERRQ(ierr);
1272       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1273     } else if (type == NORM_INFINITY) { /* max row norm */
1274       double ntemp = 0.0;
1275       for ( j=0; j<amat->m; j++ ) {
1276         v = amat->a + amat->i[j] + shift;
1277         sum = 0.0;
1278         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1279           sum += PetscAbsScalar(*v); v++;
1280         }
1281         v = bmat->a + bmat->i[j] + shift;
1282         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1283           sum += PetscAbsScalar(*v); v++;
1284         }
1285         if (sum > ntemp) ntemp = sum;
1286       }
1287       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1288     } else {
1289       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1290     }
1291   }
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNC__
1296 #define __FUNC__ "MatTranspose_MPIAIJ"
1297 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1298 {
1299   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1300   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1301   int        ierr,shift = Aloc->indexshift;
1302   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1303   Mat        B;
1304   Scalar     *array;
1305 
1306   PetscFunctionBegin;
1307   if (matout == PETSC_NULL && M != N) {
1308     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1309   }
1310 
1311   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1312 
1313   /* copy over the A part */
1314   Aloc = (Mat_SeqAIJ*) a->A->data;
1315   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1316   row = a->rstart;
1317   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1318   for ( i=0; i<m; i++ ) {
1319     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1320     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1321   }
1322   aj = Aloc->j;
1323   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1324 
1325   /* copy over the B part */
1326   Aloc = (Mat_SeqAIJ*) a->B->data;
1327   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1328   row = a->rstart;
1329   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1330   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1331   for ( i=0; i<m; i++ ) {
1332     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1333     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1334   }
1335   ierr = PetscFree(ct);CHKERRQ(ierr);
1336   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1337   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1338   if (matout != PETSC_NULL) {
1339     *matout = B;
1340   } else {
1341     PetscOps       *Abops;
1342     struct _MatOps *Aops;
1343 
1344     /* This isn't really an in-place transpose .... but free data structures from a */
1345     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1346     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1347     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1348 #if defined (PETSC_USE_CTABLE)
1349     if (a->colmap) TableDelete(a->colmap);
1350 #else
1351     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1352 #endif
1353     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1354     if (a->lvec) VecDestroy(a->lvec);
1355     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1356     ierr = PetscFree(a);CHKERRQ(ierr);
1357 
1358     /*
1359        This is horrible, horrible code. We need to keep the
1360       A pointers for the bops and ops but copy everything
1361       else from C.
1362     */
1363     Abops   = A->bops;
1364     Aops    = A->ops;
1365     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1366     A->bops = Abops;
1367     A->ops  = Aops;
1368     PetscHeaderDestroy(B);
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNC__
1374 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1375 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1376 {
1377   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1378   Mat        a = aij->A, b = aij->B;
1379   int        ierr,s1,s2,s3;
1380 
1381   PetscFunctionBegin;
1382   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1383   if (rr) {
1384     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1385     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1386     /* Overlap communication with computation. */
1387     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1388   }
1389   if (ll) {
1390     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1391     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1392     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1393   }
1394   /* scale  the diagonal block */
1395   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1396 
1397   if (rr) {
1398     /* Do a scatter end and then right scale the off-diagonal block */
1399     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1400     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1401   }
1402 
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 
1407 #undef __FUNC__
1408 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1409 int MatPrintHelp_MPIAIJ(Mat A)
1410 {
1411   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1412   int        ierr;
1413 
1414   PetscFunctionBegin;
1415   if (!a->rank) {
1416     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNC__
1422 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1423 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1424 {
1425   PetscFunctionBegin;
1426   *bs = 1;
1427   PetscFunctionReturn(0);
1428 }
1429 #undef __FUNC__
1430 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1431 int MatSetUnfactored_MPIAIJ(Mat A)
1432 {
1433   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1434   int        ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNC__
1442 #define __FUNC__ "MatEqual_MPIAIJ"
1443 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1444 {
1445   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1446   Mat        a, b, c, d;
1447   PetscTruth flg;
1448   int        ierr;
1449 
1450   PetscFunctionBegin;
1451   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1452   a = matA->A; b = matA->B;
1453   c = matB->A; d = matB->B;
1454 
1455   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1456   if (flg == PETSC_TRUE) {
1457     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1458   }
1459   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNC__
1464 #define __FUNC__ "MatCopy_MPIAIJ"
1465 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1466 {
1467   int        ierr;
1468   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1469   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1470 
1471   PetscFunctionBegin;
1472   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1473     /* because of the column compression in the off-processor part of the matrix a->B,
1474        the number of columns in a->B and b->B may be different, hence we cannot call
1475        the MatCopy() directly on the two parts. If need be, we can provide a more
1476        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1477        then copying the submatrices */
1478     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1479   } else {
1480     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1481     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1487 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1488 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1489 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1490 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1491 
1492 /* -------------------------------------------------------------------*/
1493 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1494        MatGetRow_MPIAIJ,
1495        MatRestoreRow_MPIAIJ,
1496        MatMult_MPIAIJ,
1497        MatMultAdd_MPIAIJ,
1498        MatMultTrans_MPIAIJ,
1499        MatMultTransAdd_MPIAIJ,
1500        0,
1501        0,
1502        0,
1503        0,
1504        0,
1505        0,
1506        MatRelax_MPIAIJ,
1507        MatTranspose_MPIAIJ,
1508        MatGetInfo_MPIAIJ,
1509        MatEqual_MPIAIJ,
1510        MatGetDiagonal_MPIAIJ,
1511        MatDiagonalScale_MPIAIJ,
1512        MatNorm_MPIAIJ,
1513        MatAssemblyBegin_MPIAIJ,
1514        MatAssemblyEnd_MPIAIJ,
1515        0,
1516        MatSetOption_MPIAIJ,
1517        MatZeroEntries_MPIAIJ,
1518        MatZeroRows_MPIAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523        MatGetSize_MPIAIJ,
1524        MatGetLocalSize_MPIAIJ,
1525        MatGetOwnershipRange_MPIAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530        MatDuplicate_MPIAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535        0,
1536        MatGetSubMatrices_MPIAIJ,
1537        MatIncreaseOverlap_MPIAIJ,
1538        MatGetValues_MPIAIJ,
1539        MatCopy_MPIAIJ,
1540        MatPrintHelp_MPIAIJ,
1541        MatScale_MPIAIJ,
1542        0,
1543        0,
1544        0,
1545        MatGetBlockSize_MPIAIJ,
1546        0,
1547        0,
1548        0,
1549        0,
1550        MatFDColoringCreate_MPIAIJ,
1551        0,
1552        MatSetUnfactored_MPIAIJ,
1553        0,
1554        0,
1555        MatGetSubMatrix_MPIAIJ,
1556        0,
1557        0,
1558        MatGetMaps_Petsc};
1559 
1560 /* ----------------------------------------------------------------------------------------*/
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNC__
1564 #define __FUNC__ "MatStoreValues_MPIAIJ"
1565 int MatStoreValues_MPIAIJ(Mat mat)
1566 {
1567   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1568   int        ierr;
1569 
1570   PetscFunctionBegin;
1571   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1572   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNC__
1579 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1580 int MatRetrieveValues_MPIAIJ(Mat mat)
1581 {
1582   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1583   int        ierr;
1584 
1585   PetscFunctionBegin;
1586   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1587   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 EXTERN_C_END
1591 
1592 #include "pc.h"
1593 EXTERN_C_BEGIN
1594 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1595 EXTERN_C_END
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatCreateMPIAIJ"
1599 /*@C
1600    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1601    (the default parallel PETSc format).  For good matrix assembly performance
1602    the user should preallocate the matrix storage by setting the parameters
1603    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1604    performance can be increased by more than a factor of 50.
1605 
1606    Collective on MPI_Comm
1607 
1608    Input Parameters:
1609 +  comm - MPI communicator
1610 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1611            This value should be the same as the local size used in creating the
1612            y vector for the matrix-vector product y = Ax.
1613 .  n - This value should be the same as the local size used in creating the
1614        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1615        calculated if N is given) For square matrices n is almost always m.
1616 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1617 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1618 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1619            (same value is used for all local rows)
1620 .  d_nnz - array containing the number of nonzeros in the various rows of the
1621            DIAGONAL portion of the local submatrix (possibly different for each row)
1622            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1623            The size of this array is equal to the number of local rows, i.e 'm'.
1624            You must leave room for the diagonal entry even if it is zero.
1625 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1626            submatrix (same value is used for all local rows).
1627 -  o_nnz - array containing the number of nonzeros in the various rows of the
1628            OFF-DIAGONAL portion of the local submatrix (possibly different for
1629            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1630            structure. The size of this array is equal to the number
1631            of local rows, i.e 'm'.
1632 
1633    Output Parameter:
1634 .  A - the matrix
1635 
1636    Notes:
1637    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1638    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1639    storage requirements for this matrix.
1640 
1641    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1642    processor than it must be used on all processors that share the object for
1643    that argument.
1644 
1645    The AIJ format (also called the Yale sparse matrix format or
1646    compressed row storage), is fully compatible with standard Fortran 77
1647    storage.  That is, the stored row and column indices can begin at
1648    either one (as in Fortran) or zero.  See the users manual for details.
1649 
1650    The user MUST specify either the local or global matrix dimensions
1651    (possibly both).
1652 
1653    The parallel matrix is partitioned such that the first m0 rows belong to
1654    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1655    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1656 
1657    The DIAGONAL portion of the local submatrix of a processor can be defined
1658    as the submatrix which is obtained by extraction the part corresponding
1659    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1660    first row that belongs to the processor, and r2 is the last row belonging
1661    to the this processor. This is a square mxm matrix. The remaining portion
1662    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1663 
1664    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1665 
1666    By default, this format uses inodes (identical nodes) when possible.
1667    We search for consecutive rows with the same nonzero structure, thereby
1668    reusing matrix information to achieve increased efficiency.
1669 
1670    Options Database Keys:
1671 +  -mat_aij_no_inode  - Do not use inodes
1672 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1673 -  -mat_aij_oneindex - Internally use indexing starting at 1
1674         rather than 0.  Note that when calling MatSetValues(),
1675         the user still MUST index entries starting at 0!
1676 .   -mat_mpi - use the parallel matrix data structures even on one processor
1677                (defaults to using SeqBAIJ format on one processor)
1678 .   -mat_mpi - use the parallel matrix data structures even on one processor
1679                (defaults to using SeqAIJ format on one processor)
1680 
1681 
1682    Example usage:
1683 
1684    Consider the following 8x8 matrix with 34 non-zero values, that is
1685    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1686    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1687    as follows:
1688 
1689 .vb
1690             1  2  0  |  0  3  0  |  0  4
1691     Proc0   0  5  6  |  7  0  0  |  8  0
1692             9  0 10  | 11  0  0  | 12  0
1693     -------------------------------------
1694            13  0 14  | 15 16 17  |  0  0
1695     Proc1   0 18  0  | 19 20 21  |  0  0
1696             0  0  0  | 22 23  0  | 24  0
1697     -------------------------------------
1698     Proc2  25 26 27  |  0  0 28  | 29  0
1699            30  0  0  | 31 32 33  |  0 34
1700 .ve
1701 
1702    This can be represented as a collection of submatrices as:
1703 
1704 .vb
1705       A B C
1706       D E F
1707       G H I
1708 .ve
1709 
1710    Where the submatrices A,B,C are owned by proc0, D,E,F are
1711    owned by proc1, G,H,I are owned by proc2.
1712 
1713    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1714    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1715    The 'M','N' parameters are 8,8, and have the same values on all procs.
1716 
1717    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1718    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1719    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1720    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1721    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1722    matrix, ans [DF] as another SeqAIJ matrix.
1723 
1724    When d_nz, o_nz parameters are specified, d_nz storage elements are
1725    allocated for every row of the local diagonal submatrix, and o_nz
1726    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1727    One way to choose d_nz and o_nz is to use the max nonzerors per local
1728    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1729    In this case, the values of d_nz,o_nz are:
1730 .vb
1731      proc0 : dnz = 2, o_nz = 2
1732      proc1 : dnz = 3, o_nz = 2
1733      proc2 : dnz = 1, o_nz = 4
1734 .ve
1735    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1736    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1737    for proc3. i.e we are using 12+15+10=37 storage locations to store
1738    34 values.
1739 
1740    When d_nnz, o_nnz parameters are specified, the storage is specified
1741    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1742    In the above case the values for d_nnz,o_nnz are:
1743 .vb
1744      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1745      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1746      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1747 .ve
1748    Here the space allocated is sum of all the above values i.e 34, and
1749    hence pre-allocation is perfect.
1750 
1751    Level: intermediate
1752 
1753 .keywords: matrix, aij, compressed row, sparse, parallel
1754 
1755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1756 @*/
1757 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1758 {
1759   Mat          B;
1760   Mat_MPIAIJ   *b;
1761   int          ierr, i,size,flag1 = 0, flag2 = 0;
1762 
1763   PetscFunctionBegin;
1764 
1765   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1766   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1767   if (d_nnz) {
1768     for (i=0; i<m; i++) {
1769       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1770     }
1771   }
1772   if (o_nnz) {
1773     for (i=0; i<m; i++) {
1774       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1775     }
1776   }
1777 
1778   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1779   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1780   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1781   if (!flag1 && !flag2 && size == 1) {
1782     if (M == PETSC_DECIDE) M = m;
1783     if (N == PETSC_DECIDE) N = n;
1784     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1785     PetscFunctionReturn(0);
1786   }
1787 
1788   *A = 0;
1789   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1790   PLogObjectCreate(B);
1791   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1792   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1793   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1794   B->ops->destroy = MatDestroy_MPIAIJ;
1795   B->ops->view    = MatView_MPIAIJ;
1796   B->factor       = 0;
1797   B->assembled    = PETSC_FALSE;
1798   B->mapping      = 0;
1799 
1800   B->insertmode      = NOT_SET_VALUES;
1801   b->size            = size;
1802   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1803 
1804   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1805     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1806   }
1807 
1808   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1809   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1810   b->m = m; B->m = m;
1811   b->n = n; B->n = n;
1812   b->N = N; B->N = N;
1813   b->M = M; B->M = M;
1814 
1815   /* the information in the maps duplicates the information computed below, eventually
1816      we should remove the duplicate information that is not contained in the maps */
1817   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1818   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1819 
1820   /* build local table of row and column ownerships */
1821   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1822   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1823   b->cowners = b->rowners + b->size + 2;
1824   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1825   b->rowners[0] = 0;
1826   for ( i=2; i<=b->size; i++ ) {
1827     b->rowners[i] += b->rowners[i-1];
1828   }
1829   b->rstart = b->rowners[b->rank];
1830   b->rend   = b->rowners[b->rank+1];
1831   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1832   b->cowners[0] = 0;
1833   for ( i=2; i<=b->size; i++ ) {
1834     b->cowners[i] += b->cowners[i-1];
1835   }
1836   b->cstart = b->cowners[b->rank];
1837   b->cend   = b->cowners[b->rank+1];
1838 
1839   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1840   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1841   PLogObjectParent(B,b->A);
1842   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1843   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1844   PLogObjectParent(B,b->B);
1845 
1846   /* build cache for off array entries formed */
1847   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1848   b->donotstash  = 0;
1849   b->colmap      = 0;
1850   b->garray      = 0;
1851   b->roworiented = 1;
1852 
1853   /* stuff used for matrix vector multiply */
1854   b->lvec      = 0;
1855   b->Mvctx     = 0;
1856 
1857   /* stuff for MatGetRow() */
1858   b->rowindices   = 0;
1859   b->rowvalues    = 0;
1860   b->getrowactive = PETSC_FALSE;
1861 
1862   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1863                                      "MatStoreValues_MPIAIJ",
1864                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1866                                      "MatRetrieveValues_MPIAIJ",
1867                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1869                                      "MatGetDiagonalBlock_MPIAIJ",
1870                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1871   *A = B;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNC__
1876 #define __FUNC__ "MatDuplicate_MPIAIJ"
1877 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1878 {
1879   Mat        mat;
1880   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1881   int        ierr, len=0, flg;
1882 
1883   PetscFunctionBegin;
1884   *newmat       = 0;
1885   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1886   PLogObjectCreate(mat);
1887   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1888   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1889   mat->ops->destroy = MatDestroy_MPIAIJ;
1890   mat->ops->view    = MatView_MPIAIJ;
1891   mat->factor       = matin->factor;
1892   mat->assembled    = PETSC_TRUE;
1893   mat->insertmode   = NOT_SET_VALUES;
1894 
1895   a->m = mat->m   = oldmat->m;
1896   a->n = mat->n   = oldmat->n;
1897   a->M = mat->M   = oldmat->M;
1898   a->N = mat->N   = oldmat->N;
1899 
1900   a->rstart       = oldmat->rstart;
1901   a->rend         = oldmat->rend;
1902   a->cstart       = oldmat->cstart;
1903   a->cend         = oldmat->cend;
1904   a->size         = oldmat->size;
1905   a->rank         = oldmat->rank;
1906   a->donotstash   = oldmat->donotstash;
1907   a->roworiented  = oldmat->roworiented;
1908   a->rowindices   = 0;
1909   a->rowvalues    = 0;
1910   a->getrowactive = PETSC_FALSE;
1911 
1912   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1913   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1914   a->cowners = a->rowners + a->size + 2;
1915   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1916   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1917   if (oldmat->colmap) {
1918 #if defined (PETSC_USE_CTABLE)
1919     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1920 #else
1921     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1922     PLogObjectMemory(mat,(a->N)*sizeof(int));
1923     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1924 #endif
1925   } else a->colmap = 0;
1926   if (oldmat->garray) {
1927     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1928     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1929     PLogObjectMemory(mat,len*sizeof(int));
1930     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1931   } else a->garray = 0;
1932 
1933   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1934   PLogObjectParent(mat,a->lvec);
1935   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1936   PLogObjectParent(mat,a->Mvctx);
1937   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1938   PLogObjectParent(mat,a->A);
1939   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1940   PLogObjectParent(mat,a->B);
1941   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1942   if (flg) {
1943     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1944   }
1945   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1946   *newmat = mat;
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #include "sys.h"
1951 
1952 #undef __FUNC__
1953 #define __FUNC__ "MatLoad_MPIAIJ"
1954 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1955 {
1956   Mat          A;
1957   Scalar       *vals,*svals;
1958   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1959   MPI_Status   status;
1960   int          i, nz, ierr, j,rstart, rend, fd;
1961   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1962   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1963   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1964 
1965   PetscFunctionBegin;
1966   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1967   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1968   if (!rank) {
1969     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1970     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1971     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1972     if (header[3] < 0) {
1973       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1974     }
1975   }
1976 
1977   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1978   M = header[1]; N = header[2];
1979   /* determine ownership of all rows */
1980   m = M/size + ((M % size) > rank);
1981   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1982   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1983   rowners[0] = 0;
1984   for ( i=2; i<=size; i++ ) {
1985     rowners[i] += rowners[i-1];
1986   }
1987   rstart = rowners[rank];
1988   rend   = rowners[rank+1];
1989 
1990   /* distribute row lengths to all processors */
1991   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1992   offlens = ourlens + (rend-rstart);
1993   if (!rank) {
1994     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1995     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1996     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1997     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1998     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1999     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2000   } else {
2001     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2002   }
2003 
2004   if (!rank) {
2005     /* calculate the number of nonzeros on each processor */
2006     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2007     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2008     for ( i=0; i<size; i++ ) {
2009       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2010         procsnz[i] += rowlengths[j];
2011       }
2012     }
2013     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2014 
2015     /* determine max buffer needed and allocate it */
2016     maxnz = 0;
2017     for ( i=0; i<size; i++ ) {
2018       maxnz = PetscMax(maxnz,procsnz[i]);
2019     }
2020     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2021 
2022     /* read in my part of the matrix column indices  */
2023     nz     = procsnz[0];
2024     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2025     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2026 
2027     /* read in every one elses and ship off */
2028     for ( i=1; i<size; i++ ) {
2029       nz   = procsnz[i];
2030       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2031       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2032     }
2033     ierr = PetscFree(cols);CHKERRQ(ierr);
2034   } else {
2035     /* determine buffer space needed for message */
2036     nz = 0;
2037     for ( i=0; i<m; i++ ) {
2038       nz += ourlens[i];
2039     }
2040     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2041 
2042     /* receive message of column indices*/
2043     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2044     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2045     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2046   }
2047 
2048   /* determine column ownership if matrix is not square */
2049   if (N != M) {
2050     n      = N/size + ((N % size) > rank);
2051     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2052     cstart = cend - n;
2053   } else {
2054     cstart = rstart;
2055     cend   = rend;
2056     n      = cend - cstart;
2057   }
2058 
2059   /* loop over local rows, determining number of off diagonal entries */
2060   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2061   jj = 0;
2062   for ( i=0; i<m; i++ ) {
2063     for ( j=0; j<ourlens[i]; j++ ) {
2064       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2065       jj++;
2066     }
2067   }
2068 
2069   /* create our matrix */
2070   for ( i=0; i<m; i++ ) {
2071     ourlens[i] -= offlens[i];
2072   }
2073   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2074   A = *newmat;
2075   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2076   for ( i=0; i<m; i++ ) {
2077     ourlens[i] += offlens[i];
2078   }
2079 
2080   if (!rank) {
2081     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2082 
2083     /* read in my part of the matrix numerical values  */
2084     nz   = procsnz[0];
2085     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2086 
2087     /* insert into matrix */
2088     jj      = rstart;
2089     smycols = mycols;
2090     svals   = vals;
2091     for ( i=0; i<m; i++ ) {
2092       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2093       smycols += ourlens[i];
2094       svals   += ourlens[i];
2095       jj++;
2096     }
2097 
2098     /* read in other processors and ship out */
2099     for ( i=1; i<size; i++ ) {
2100       nz   = procsnz[i];
2101       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2102       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2103     }
2104     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2105   } else {
2106     /* receive numeric values */
2107     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2108 
2109     /* receive message of values*/
2110     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2111     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2112     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2113 
2114     /* insert into matrix */
2115     jj      = rstart;
2116     smycols = mycols;
2117     svals   = vals;
2118     for ( i=0; i<m; i++ ) {
2119       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2120       smycols += ourlens[i];
2121       svals   += ourlens[i];
2122       jj++;
2123     }
2124   }
2125   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2126   ierr = PetscFree(vals);CHKERRQ(ierr);
2127   ierr = PetscFree(mycols);CHKERRQ(ierr);
2128   ierr = PetscFree(rowners);CHKERRQ(ierr);
2129 
2130   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNC__
2136 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2137 /*
2138     Not great since it makes two copies of the submatrix, first an SeqAIJ
2139   in local and then by concatenating the local matrices the end result.
2140   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2141 */
2142 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2143 {
2144   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2145   Mat        *local,M, Mreuse;
2146   Scalar     *vwork,*aa;
2147   MPI_Comm   comm = mat->comm;
2148   Mat_SeqAIJ *aij;
2149   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2150 
2151   PetscFunctionBegin;
2152   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2154 
2155   if (call ==  MAT_REUSE_MATRIX) {
2156     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2157     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2158     local = &Mreuse;
2159     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2160   } else {
2161     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2162     Mreuse = *local;
2163     ierr = PetscFree(local);CHKERRQ(ierr);
2164   }
2165 
2166   /*
2167       m - number of local rows
2168       n - number of columns (same on all processors)
2169       rstart - first row in new global matrix generated
2170   */
2171   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2172   if (call == MAT_INITIAL_MATRIX) {
2173     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2174     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2175     ii  = aij->i;
2176     jj  = aij->j;
2177 
2178     /*
2179         Determine the number of non-zeros in the diagonal and off-diagonal
2180         portions of the matrix in order to do correct preallocation
2181     */
2182 
2183     /* first get start and end of "diagonal" columns */
2184     if (csize == PETSC_DECIDE) {
2185       nlocal = n/size + ((n % size) > rank);
2186     } else {
2187       nlocal = csize;
2188     }
2189     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2190     rstart = rend - nlocal;
2191     if (rank == size - 1 && rend != n) {
2192       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2193     }
2194 
2195     /* next, compute all the lengths */
2196     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2197     olens = dlens + m;
2198     for ( i=0; i<m; i++ ) {
2199       jend = ii[i+1] - ii[i];
2200       olen = 0;
2201       dlen = 0;
2202       for ( j=0; j<jend; j++ ) {
2203         if ( *jj < rstart || *jj >= rend) olen++;
2204         else dlen++;
2205         jj++;
2206       }
2207       olens[i] = olen;
2208       dlens[i] = dlen;
2209     }
2210     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2211     ierr = PetscFree(dlens);CHKERRQ(ierr);
2212   } else {
2213     int ml,nl;
2214 
2215     M = *newmat;
2216     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2217     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2218     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2219     /*
2220          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2221        rather than the slower MatSetValues().
2222     */
2223     M->was_assembled = PETSC_TRUE;
2224     M->assembled     = PETSC_FALSE;
2225   }
2226   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2227   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2228   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2229   ii  = aij->i;
2230   jj  = aij->j;
2231   aa  = aij->a;
2232   for (i=0; i<m; i++) {
2233     row   = rstart + i;
2234     nz    = ii[i+1] - ii[i];
2235     cwork = jj;     jj += nz;
2236     vwork = aa;     aa += nz;
2237     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2238   }
2239 
2240   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2242   *newmat = M;
2243 
2244   /* save submatrix used in processor for next request */
2245   if (call ==  MAT_INITIAL_MATRIX) {
2246     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2247     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2248   }
2249 
2250   PetscFunctionReturn(0);
2251 }
2252