xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a3b388050d4e8a4c38128e174a7572fa24ee1ba5)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.296 1999/06/10 16:08:15 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
455   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
456   nrecvs = work[rank];
457   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
458   nmax = work[rank];
459   ierr = PetscFree(work);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
463   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
464   for ( i=0; i<nrecvs; i++ ) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
473   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
474   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
475   starts[0] = 0;
476   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   for ( i=0; i<N; i++ ) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ISRestoreIndices(is,&rows);
481 
482   starts[0] = 0;
483   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
484   count = 0;
485   for ( i=0; i<size; i++ ) {
486     if (procs[i]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]  = n;
504     slen += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
511   count = 0;
512   for ( i=0; i<nrecvs; i++ ) {
513     values = rvalues + i*nmax;
514     for ( j=0; j<lens[i]; j++ ) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for ( i = 0; i < slen; i++ ) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
560     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatMult_MPIAIJ"
571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr,nt;
575 
576   PetscFunctionBegin;
577   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
578   if (nt != a->n) {
579     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
580   }
581   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
582   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
583   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatMultAdd_MPIAIJ"
590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
591 {
592   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
593   int        ierr;
594 
595   PetscFunctionBegin;
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatMultTrans_MPIAIJ"
605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   /* do nondiagonal part */
612   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
613   /* send it on its way */
614   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   /* do local part */
616   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
617   /* receive remote parts: note this assumes the values are not actually */
618   /* inserted in yy until the next line, which is true for my implementation*/
619   /* but is not perhaps always true. */
620   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
629   int        ierr;
630 
631   PetscFunctionBegin;
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   /* send it on its way */
635   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   /* do local part */
637   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
638   /* receive remote parts: note this assumes the values are not actually */
639   /* inserted in yy until the next line, which is true for my implementation*/
640   /* but is not perhaps always true. */
641   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646   This only works correctly for square matrices where the subblock A->A is the
647    diagonal block
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
652 {
653   int        ierr;
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655 
656   PetscFunctionBegin;
657   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
658   if (a->rstart != a->cstart || a->rend != a->cend) {
659     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
660   }
661   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatScale_MPIAIJ"
667 int MatScale_MPIAIJ(Scalar *aa,Mat A)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   PetscFunctionBegin;
673   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
674   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNC__
679 #define __FUNC__ "MatDestroy_MPIAIJ"
680 int MatDestroy_MPIAIJ(Mat mat)
681 {
682   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686   if (--mat->refct > 0) PetscFunctionReturn(0);
687 
688   if (mat->mapping) {
689     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
690   }
691   if (mat->bmapping) {
692     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
693   }
694   if (mat->rmap) {
695     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
696   }
697   if (mat->cmap) {
698     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
699   }
700 #if defined(PETSC_USE_LOG)
701   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
702 #endif
703   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
704   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
707 #if defined (PETSC_USE_CTABLE)
708   if (aij->colmap) TableDelete(aij->colmap);
709 #else
710   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
711 #endif
712   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
713   if (aij->lvec)   VecDestroy(aij->lvec);
714   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
715   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
716   ierr = PetscFree(aij);CHKERRQ(ierr);
717   PLogObjectDestroy(mat);
718   PetscHeaderDestroy(mat);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNC__
723 #define __FUNC__ "MatView_MPIAIJ_Binary"
724 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
725 {
726   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
727   int         ierr;
728 
729   PetscFunctionBegin;
730   if (aij->size == 1) {
731     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
732   }
733   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNC__
738 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
739 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
740 {
741   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
742   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
743   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
744   int         size = aij->size;
745   FILE        *fd;
746   ViewerType  vtype;
747 
748   PetscFunctionBegin;
749   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
750   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
751     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
752     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
753       MatInfo info;
754       int     flg;
755       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
757       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
758       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
759       PetscSequentialPhaseBegin(mat->comm,1);
760       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
761          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
762       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
763          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
764       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
765       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
766       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
767       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
768       fflush(fd);
769       PetscSequentialPhaseEnd(mat->comm,1);
770       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
771       PetscFunctionReturn(0);
772     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
773       PetscFunctionReturn(0);
774     }
775   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
776     Draw       draw;
777     PetscTruth isnull;
778     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
779     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
780   }
781 
782   if (size == 1) {
783     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
784   } else {
785     /* assemble the entire matrix onto first processor. */
786     Mat         A;
787     Mat_SeqAIJ *Aloc;
788     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
789     Scalar      *a;
790 
791     if (!rank) {
792       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
793     } else {
794       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
795     }
796     PLogObjectParent(mat,A);
797 
798     /* copy over the A part */
799     Aloc = (Mat_SeqAIJ*) aij->A->data;
800     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
801     row = aij->rstart;
802     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
803     for ( i=0; i<m; i++ ) {
804       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
805       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
806     }
807     aj = Aloc->j;
808     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
809 
810     /* copy over the B part */
811     Aloc = (Mat_SeqAIJ*) aij->B->data;
812     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
813     row = aij->rstart;
814     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
815     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
816     for ( i=0; i<m; i++ ) {
817       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
818       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
819     }
820     ierr = PetscFree(ct);CHKERRQ(ierr);
821     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     /*
824        Everyone has to call to draw the matrix since the graphics waits are
825        synchronized across all processors that share the Draw object
826     */
827     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
828       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
829     }
830     ierr = MatDestroy(A);CHKERRQ(ierr);
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNC__
836 #define __FUNC__ "MatView_MPIAIJ"
837 int MatView_MPIAIJ(Mat mat,Viewer viewer)
838 {
839   int         ierr;
840   ViewerType  vtype;
841 
842   PetscFunctionBegin;
843   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
844   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
845       PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) {
846     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
847     /*
848   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
849     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
850     */
851   } else {
852     SETERRQ(1,1,"Viewer type not supported by PETSc object");
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 /*
858     This has to provide several versions.
859 
860      2) a) use only local smoothing updating outer values only once.
861         b) local smoothing updating outer values each inner iteration
862      3) color updating out values betwen colors.
863 */
864 #undef __FUNC__
865 #define __FUNC__ "MatRelax_MPIAIJ"
866 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
867                            double fshift,int its,Vec xx)
868 {
869   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
870   Mat        AA = mat->A, BB = mat->B;
871   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
872   Scalar     *b,*x,*xs,*ls,d,*v,sum;
873   int        ierr,*idx, *diag;
874   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
875 
876   PetscFunctionBegin;
877   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
879   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
880   xs = x + shift; /* shift by one for index start of 1 */
881   ls = ls + shift;
882   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
883   diag = A->diag;
884   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
885     if (flag & SOR_ZERO_INITIAL_GUESS) {
886       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
887       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
889       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
890       PetscFunctionReturn(0);
891     }
892     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
893     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
894     while (its--) {
895       /* go down through the rows */
896       for ( i=0; i<m; i++ ) {
897         n    = A->i[i+1] - A->i[i];
898 	PLogFlops(4*n+3);
899         idx  = A->j + A->i[i] + shift;
900         v    = A->a + A->i[i] + shift;
901         sum  = b[i];
902         SPARSEDENSEMDOT(sum,xs,v,idx,n);
903         d    = fshift + A->a[diag[i]+shift];
904         n    = B->i[i+1] - B->i[i];
905         idx  = B->j + B->i[i] + shift;
906         v    = B->a + B->i[i] + shift;
907         SPARSEDENSEMDOT(sum,ls,v,idx,n);
908         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
909       }
910       /* come up through the rows */
911       for ( i=m-1; i>-1; i-- ) {
912         n    = A->i[i+1] - A->i[i];
913 	PLogFlops(4*n+3)
914         idx  = A->j + A->i[i] + shift;
915         v    = A->a + A->i[i] + shift;
916         sum  = b[i];
917         SPARSEDENSEMDOT(sum,xs,v,idx,n);
918         d    = fshift + A->a[diag[i]+shift];
919         n    = B->i[i+1] - B->i[i];
920         idx  = B->j + B->i[i] + shift;
921         v    = B->a + B->i[i] + shift;
922         SPARSEDENSEMDOT(sum,ls,v,idx,n);
923         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
924       }
925     }
926   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
927     if (flag & SOR_ZERO_INITIAL_GUESS) {
928       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
929       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
931       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
932       PetscFunctionReturn(0);
933     }
934     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
935     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
936     while (its--) {
937       for ( i=0; i<m; i++ ) {
938         n    = A->i[i+1] - A->i[i];
939 	PLogFlops(4*n+3);
940         idx  = A->j + A->i[i] + shift;
941         v    = A->a + A->i[i] + shift;
942         sum  = b[i];
943         SPARSEDENSEMDOT(sum,xs,v,idx,n);
944         d    = fshift + A->a[diag[i]+shift];
945         n    = B->i[i+1] - B->i[i];
946         idx  = B->j + B->i[i] + shift;
947         v    = B->a + B->i[i] + shift;
948         SPARSEDENSEMDOT(sum,ls,v,idx,n);
949         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
950       }
951     }
952   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
953     if (flag & SOR_ZERO_INITIAL_GUESS) {
954       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
955       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
956       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
957       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
958       PetscFunctionReturn(0);
959     }
960     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
961     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
962     while (its--) {
963       for ( i=m-1; i>-1; i-- ) {
964         n    = A->i[i+1] - A->i[i];
965 	PLogFlops(4*n+3);
966         idx  = A->j + A->i[i] + shift;
967         v    = A->a + A->i[i] + shift;
968         sum  = b[i];
969         SPARSEDENSEMDOT(sum,xs,v,idx,n);
970         d    = fshift + A->a[diag[i]+shift];
971         n    = B->i[i+1] - B->i[i];
972         idx  = B->j + B->i[i] + shift;
973         v    = B->a + B->i[i] + shift;
974         SPARSEDENSEMDOT(sum,ls,v,idx,n);
975         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
976       }
977     }
978   } else {
979     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
980   }
981   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
982   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
983   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNC__
988 #define __FUNC__ "MatGetInfo_MPIAIJ"
989 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
990 {
991   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992   Mat        A = mat->A, B = mat->B;
993   int        ierr;
994   double     isend[5], irecv[5];
995 
996   PetscFunctionBegin;
997   info->block_size     = 1.0;
998   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
999   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1000   isend[3] = info->memory;  isend[4] = info->mallocs;
1001   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1002   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1003   isend[3] += info->memory;  isend[4] += info->mallocs;
1004   if (flag == MAT_LOCAL) {
1005     info->nz_used      = isend[0];
1006     info->nz_allocated = isend[1];
1007     info->nz_unneeded  = isend[2];
1008     info->memory       = isend[3];
1009     info->mallocs      = isend[4];
1010   } else if (flag == MAT_GLOBAL_MAX) {
1011     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1012     info->nz_used      = irecv[0];
1013     info->nz_allocated = irecv[1];
1014     info->nz_unneeded  = irecv[2];
1015     info->memory       = irecv[3];
1016     info->mallocs      = irecv[4];
1017   } else if (flag == MAT_GLOBAL_SUM) {
1018     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1019     info->nz_used      = irecv[0];
1020     info->nz_allocated = irecv[1];
1021     info->nz_unneeded  = irecv[2];
1022     info->memory       = irecv[3];
1023     info->mallocs      = irecv[4];
1024   }
1025   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1026   info->fill_ratio_needed = 0;
1027   info->factor_mallocs    = 0;
1028   info->rows_global       = (double)mat->M;
1029   info->columns_global    = (double)mat->N;
1030   info->rows_local        = (double)mat->m;
1031   info->columns_local     = (double)mat->N;
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNC__
1037 #define __FUNC__ "MatSetOption_MPIAIJ"
1038 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1039 {
1040   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1041   int        ierr;
1042 
1043   PetscFunctionBegin;
1044   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1045       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1046       op == MAT_COLUMNS_UNSORTED ||
1047       op == MAT_COLUMNS_SORTED ||
1048       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1049       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1050         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1051         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1052   } else if (op == MAT_ROW_ORIENTED) {
1053     a->roworiented = 1;
1054     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1055     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1056   } else if (op == MAT_ROWS_SORTED ||
1057              op == MAT_ROWS_UNSORTED ||
1058              op == MAT_SYMMETRIC ||
1059              op == MAT_STRUCTURALLY_SYMMETRIC ||
1060              op == MAT_YES_NEW_DIAGONALS)
1061     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1062   else if (op == MAT_COLUMN_ORIENTED) {
1063     a->roworiented = 0;
1064     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1065     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1066   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1067     a->donotstash = 1;
1068   } else if (op == MAT_NO_NEW_DIAGONALS){
1069     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1070   } else {
1071     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNC__
1077 #define __FUNC__ "MatGetSize_MPIAIJ"
1078 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081 
1082   PetscFunctionBegin;
1083   if (m) *m = mat->M;
1084   if (n) *n = mat->N;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1090 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1093 
1094   PetscFunctionBegin;
1095   if (m) *m = mat->m;
1096   if (n) *n = mat->n;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1102 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105 
1106   PetscFunctionBegin;
1107   *m = mat->rstart; *n = mat->rend;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNC__
1112 #define __FUNC__ "MatGetRow_MPIAIJ"
1113 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1114 {
1115   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1116   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1117   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1118   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1119   int        *cmap, *idx_p;
1120 
1121   PetscFunctionBegin;
1122   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1123   mat->getrowactive = PETSC_TRUE;
1124 
1125   if (!mat->rowvalues && (idx || v)) {
1126     /*
1127         allocate enough space to hold information from the longest row.
1128     */
1129     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1130     int     max = 1,m = mat->m,tmp;
1131     for ( i=0; i<m; i++ ) {
1132       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1133       if (max < tmp) { max = tmp; }
1134     }
1135     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1136     mat->rowindices = (int *) (mat->rowvalues + max);
1137   }
1138 
1139   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1140   lrow = row - rstart;
1141 
1142   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1143   if (!v)   {pvA = 0; pvB = 0;}
1144   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1145   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1146   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1147   nztot = nzA + nzB;
1148 
1149   cmap  = mat->garray;
1150   if (v  || idx) {
1151     if (nztot) {
1152       /* Sort by increasing column numbers, assuming A and B already sorted */
1153       int imark = -1;
1154       if (v) {
1155         *v = v_p = mat->rowvalues;
1156         for ( i=0; i<nzB; i++ ) {
1157           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1158           else break;
1159         }
1160         imark = i;
1161         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1162         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1163       }
1164       if (idx) {
1165         *idx = idx_p = mat->rowindices;
1166         if (imark > -1) {
1167           for ( i=0; i<imark; i++ ) {
1168             idx_p[i] = cmap[cworkB[i]];
1169           }
1170         } else {
1171           for ( i=0; i<nzB; i++ ) {
1172             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1173             else break;
1174           }
1175           imark = i;
1176         }
1177         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1178         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1179       }
1180     } else {
1181       if (idx) *idx = 0;
1182       if (v)   *v   = 0;
1183     }
1184   }
1185   *nz = nztot;
1186   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1187   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNC__
1192 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1193 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1194 {
1195   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1196 
1197   PetscFunctionBegin;
1198   if (aij->getrowactive == PETSC_FALSE) {
1199     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1200   }
1201   aij->getrowactive = PETSC_FALSE;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatNorm_MPIAIJ"
1207 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1208 {
1209   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1210   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1211   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1212   double     sum = 0.0;
1213   Scalar     *v;
1214 
1215   PetscFunctionBegin;
1216   if (aij->size == 1) {
1217     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1218   } else {
1219     if (type == NORM_FROBENIUS) {
1220       v = amat->a;
1221       for (i=0; i<amat->nz; i++ ) {
1222 #if defined(PETSC_USE_COMPLEX)
1223         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1224 #else
1225         sum += (*v)*(*v); v++;
1226 #endif
1227       }
1228       v = bmat->a;
1229       for (i=0; i<bmat->nz; i++ ) {
1230 #if defined(PETSC_USE_COMPLEX)
1231         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1232 #else
1233         sum += (*v)*(*v); v++;
1234 #endif
1235       }
1236       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1237       *norm = sqrt(*norm);
1238     } else if (type == NORM_1) { /* max column norm */
1239       double *tmp, *tmp2;
1240       int    *jj, *garray = aij->garray;
1241       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1242       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1243       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1244       *norm = 0.0;
1245       v = amat->a; jj = amat->j;
1246       for ( j=0; j<amat->nz; j++ ) {
1247         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1248       }
1249       v = bmat->a; jj = bmat->j;
1250       for ( j=0; j<bmat->nz; j++ ) {
1251         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1252       }
1253       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1254       for ( j=0; j<aij->N; j++ ) {
1255         if (tmp2[j] > *norm) *norm = tmp2[j];
1256       }
1257       ierr = PetscFree(tmp);CHKERRQ(ierr);
1258       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1259     } else if (type == NORM_INFINITY) { /* max row norm */
1260       double ntemp = 0.0;
1261       for ( j=0; j<amat->m; j++ ) {
1262         v = amat->a + amat->i[j] + shift;
1263         sum = 0.0;
1264         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1265           sum += PetscAbsScalar(*v); v++;
1266         }
1267         v = bmat->a + bmat->i[j] + shift;
1268         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1269           sum += PetscAbsScalar(*v); v++;
1270         }
1271         if (sum > ntemp) ntemp = sum;
1272       }
1273       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1274     } else {
1275       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1276     }
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNC__
1282 #define __FUNC__ "MatTranspose_MPIAIJ"
1283 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1284 {
1285   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1286   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1287   int        ierr,shift = Aloc->indexshift;
1288   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1289   Mat        B;
1290   Scalar     *array;
1291 
1292   PetscFunctionBegin;
1293   if (matout == PETSC_NULL && M != N) {
1294     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1295   }
1296 
1297   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1298 
1299   /* copy over the A part */
1300   Aloc = (Mat_SeqAIJ*) a->A->data;
1301   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1302   row = a->rstart;
1303   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1304   for ( i=0; i<m; i++ ) {
1305     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1306     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1307   }
1308   aj = Aloc->j;
1309   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1310 
1311   /* copy over the B part */
1312   Aloc = (Mat_SeqAIJ*) a->B->data;
1313   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1314   row = a->rstart;
1315   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1316   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1317   for ( i=0; i<m; i++ ) {
1318     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1319     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1320   }
1321   ierr = PetscFree(ct);CHKERRQ(ierr);
1322   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1324   if (matout != PETSC_NULL) {
1325     *matout = B;
1326   } else {
1327     PetscOps       *Abops;
1328     struct _MatOps *Aops;
1329 
1330     /* This isn't really an in-place transpose .... but free data structures from a */
1331     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1332     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1333     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1334 #if defined (PETSC_USE_CTABLE)
1335     if (a->colmap) TableDelete(a->colmap);
1336 #else
1337     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1338 #endif
1339     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1340     if (a->lvec) VecDestroy(a->lvec);
1341     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1342     ierr = PetscFree(a);CHKERRQ(ierr);
1343 
1344     /*
1345        This is horrible, horrible code. We need to keep the
1346       A pointers for the bops and ops but copy everything
1347       else from C.
1348     */
1349     Abops   = A->bops;
1350     Aops    = A->ops;
1351     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1352     A->bops = Abops;
1353     A->ops  = Aops;
1354     PetscHeaderDestroy(B);
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNC__
1360 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1361 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1362 {
1363   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1364   Mat a = aij->A, b = aij->B;
1365   int ierr,s1,s2,s3;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1369   if (rr) {
1370     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1371     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1372     /* Overlap communication with computation. */
1373     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1374   }
1375   if (ll) {
1376     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1377     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1378     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1379   }
1380   /* scale  the diagonal block */
1381   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1382 
1383   if (rr) {
1384     /* Do a scatter end and then right scale the off-diagonal block */
1385     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1386     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1387   }
1388 
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 
1393 #undef __FUNC__
1394 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1395 int MatPrintHelp_MPIAIJ(Mat A)
1396 {
1397   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1398   int        ierr;
1399 
1400   PetscFunctionBegin;
1401   if (!a->rank) {
1402     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNC__
1408 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1409 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1410 {
1411   PetscFunctionBegin;
1412   *bs = 1;
1413   PetscFunctionReturn(0);
1414 }
1415 #undef __FUNC__
1416 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1417 int MatSetUnfactored_MPIAIJ(Mat A)
1418 {
1419   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1420   int        ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNC__
1428 #define __FUNC__ "MatEqual_MPIAIJ"
1429 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1430 {
1431   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1432   Mat        a, b, c, d;
1433   PetscTruth flg;
1434   int        ierr;
1435 
1436   PetscFunctionBegin;
1437   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1438   a = matA->A; b = matA->B;
1439   c = matB->A; d = matB->B;
1440 
1441   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1442   if (flg == PETSC_TRUE) {
1443     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1444   }
1445   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNC__
1450 #define __FUNC__ "MatCopy_MPIAIJ"
1451 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1452 {
1453   int        ierr;
1454   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1455   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1456 
1457   PetscFunctionBegin;
1458   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1459     /* because of the column compression in the off-processor part of the matrix a->B,
1460        the number of columns in a->B and b->B may be different, hence we cannot call
1461        the MatCopy() directly on the two parts. If need be, we can provide a more
1462        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1463        then copying the submatrices */
1464     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1465   } else {
1466     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1467     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1473 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1474 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1475 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1476 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1477 
1478 /* -------------------------------------------------------------------*/
1479 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1480        MatGetRow_MPIAIJ,
1481        MatRestoreRow_MPIAIJ,
1482        MatMult_MPIAIJ,
1483        MatMultAdd_MPIAIJ,
1484        MatMultTrans_MPIAIJ,
1485        MatMultTransAdd_MPIAIJ,
1486        0,
1487        0,
1488        0,
1489        0,
1490        0,
1491        0,
1492        MatRelax_MPIAIJ,
1493        MatTranspose_MPIAIJ,
1494        MatGetInfo_MPIAIJ,
1495        MatEqual_MPIAIJ,
1496        MatGetDiagonal_MPIAIJ,
1497        MatDiagonalScale_MPIAIJ,
1498        MatNorm_MPIAIJ,
1499        MatAssemblyBegin_MPIAIJ,
1500        MatAssemblyEnd_MPIAIJ,
1501        0,
1502        MatSetOption_MPIAIJ,
1503        MatZeroEntries_MPIAIJ,
1504        MatZeroRows_MPIAIJ,
1505        0,
1506        0,
1507        0,
1508        0,
1509        MatGetSize_MPIAIJ,
1510        MatGetLocalSize_MPIAIJ,
1511        MatGetOwnershipRange_MPIAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516        MatDuplicate_MPIAIJ,
1517        0,
1518        0,
1519        0,
1520        0,
1521        0,
1522        MatGetSubMatrices_MPIAIJ,
1523        MatIncreaseOverlap_MPIAIJ,
1524        MatGetValues_MPIAIJ,
1525        MatCopy_MPIAIJ,
1526        MatPrintHelp_MPIAIJ,
1527        MatScale_MPIAIJ,
1528        0,
1529        0,
1530        0,
1531        MatGetBlockSize_MPIAIJ,
1532        0,
1533        0,
1534        0,
1535        0,
1536        MatFDColoringCreate_MPIAIJ,
1537        0,
1538        MatSetUnfactored_MPIAIJ,
1539        0,
1540        0,
1541        MatGetSubMatrix_MPIAIJ,
1542        0,
1543        0,
1544        MatGetMaps_Petsc};
1545 
1546 /* ----------------------------------------------------------------------------------------*/
1547 
1548 EXTERN_C_BEGIN
1549 #undef __FUNC__
1550 #define __FUNC__ "MatStoreValues_MPIAIJ"
1551 int MatStoreValues_MPIAIJ(Mat mat)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1554   int        ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1558   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 EXTERN_C_END
1562 
1563 EXTERN_C_BEGIN
1564 #undef __FUNC__
1565 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1566 int MatRetrieveValues_MPIAIJ(Mat mat)
1567 {
1568   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1569   int        ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1573   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 EXTERN_C_END
1577 
1578 #include "pc.h"
1579 EXTERN_C_BEGIN
1580 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1581 EXTERN_C_END
1582 
1583 #undef __FUNC__
1584 #define __FUNC__ "MatCreateMPIAIJ"
1585 /*@C
1586    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1587    (the default parallel PETSc format).  For good matrix assembly performance
1588    the user should preallocate the matrix storage by setting the parameters
1589    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1590    performance can be increased by more than a factor of 50.
1591 
1592    Collective on MPI_Comm
1593 
1594    Input Parameters:
1595 +  comm - MPI communicator
1596 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1597            This value should be the same as the local size used in creating the
1598            y vector for the matrix-vector product y = Ax.
1599 .  n - This value should be the same as the local size used in creating the
1600        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1601        calculated if N is given) For square matrices n is almost always m.
1602 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1603 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1604 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1605            (same value is used for all local rows)
1606 .  d_nnz - array containing the number of nonzeros in the various rows of the
1607            DIAGONAL portion of the local submatrix (possibly different for each row)
1608            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1609            The size of this array is equal to the number of local rows, i.e 'm'.
1610            You must leave room for the diagonal entry even if it is zero.
1611 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1612            submatrix (same value is used for all local rows).
1613 -  o_nnz - array containing the number of nonzeros in the various rows of the
1614            OFF-DIAGONAL portion of the local submatrix (possibly different for
1615            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1616            structure. The size of this array is equal to the number
1617            of local rows, i.e 'm'.
1618 
1619    Output Parameter:
1620 .  A - the matrix
1621 
1622    Notes:
1623    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1624    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1625    storage requirements for this matrix.
1626 
1627    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1628    processor than it must be used on all processors that share the object for
1629    that argument.
1630 
1631    The AIJ format (also called the Yale sparse matrix format or
1632    compressed row storage), is fully compatible with standard Fortran 77
1633    storage.  That is, the stored row and column indices can begin at
1634    either one (as in Fortran) or zero.  See the users manual for details.
1635 
1636    The user MUST specify either the local or global matrix dimensions
1637    (possibly both).
1638 
1639    The parallel matrix is partitioned such that the first m0 rows belong to
1640    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1641    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1642 
1643    The DIAGONAL portion of the local submatrix of a processor can be defined
1644    as the submatrix which is obtained by extraction the part corresponding
1645    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1646    first row that belongs to the processor, and r2 is the last row belonging
1647    to the this processor. This is a square mxm matrix. The remaining portion
1648    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1649 
1650    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1651 
1652    By default, this format uses inodes (identical nodes) when possible.
1653    We search for consecutive rows with the same nonzero structure, thereby
1654    reusing matrix information to achieve increased efficiency.
1655 
1656    Options Database Keys:
1657 +  -mat_aij_no_inode  - Do not use inodes
1658 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1659 -  -mat_aij_oneindex - Internally use indexing starting at 1
1660         rather than 0.  Note that when calling MatSetValues(),
1661         the user still MUST index entries starting at 0!
1662 .   -mat_mpi - use the parallel matrix data structures even on one processor
1663                (defaults to using SeqBAIJ format on one processor)
1664 .   -mat_mpi - use the parallel matrix data structures even on one processor
1665                (defaults to using SeqAIJ format on one processor)
1666 
1667 
1668    Example usage:
1669 
1670    Consider the following 8x8 matrix with 34 non-zero values, that is
1671    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1672    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1673    as follows:
1674 
1675 .vb
1676             1  2  0  |  0  3  0  |  0  4
1677     Proc0   0  5  6  |  7  0  0  |  8  0
1678             9  0 10  | 11  0  0  | 12  0
1679     -------------------------------------
1680            13  0 14  | 15 16 17  |  0  0
1681     Proc1   0 18  0  | 19 20 21  |  0  0
1682             0  0  0  | 22 23  0  | 24  0
1683     -------------------------------------
1684     Proc2  25 26 27  |  0  0 28  | 29  0
1685            30  0  0  | 31 32 33  |  0 34
1686 .ve
1687 
1688    This can be represented as a collection of submatrices as:
1689 
1690 .vb
1691       A B C
1692       D E F
1693       G H I
1694 .ve
1695 
1696    Where the submatrices A,B,C are owned by proc0, D,E,F are
1697    owned by proc1, G,H,I are owned by proc2.
1698 
1699    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1700    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1701    The 'M','N' parameters are 8,8, and have the same values on all procs.
1702 
1703    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1704    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1705    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1706    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1707    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1708    matrix, ans [DF] as another SeqAIJ matrix.
1709 
1710    When d_nz, o_nz parameters are specified, d_nz storage elements are
1711    allocated for every row of the local diagonal submatrix, and o_nz
1712    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1713    One way to choose d_nz and o_nz is to use the max nonzerors per local
1714    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1715    In this case, the values of d_nz,o_nz are:
1716 .vb
1717      proc0 : dnz = 2, o_nz = 2
1718      proc1 : dnz = 3, o_nz = 2
1719      proc2 : dnz = 1, o_nz = 4
1720 .ve
1721    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1722    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1723    for proc3. i.e we are using 12+15+10=37 storage locations to store
1724    34 values.
1725 
1726    When d_nnz, o_nnz parameters are specified, the storage is specified
1727    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1728    In the above case the values for d_nnz,o_nnz are:
1729 .vb
1730      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1731      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1732      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1733 .ve
1734    Here the space allocated is sum of all the above values i.e 34, and
1735    hence pre-allocation is perfect.
1736 
1737    Level: intermediate
1738 
1739 .keywords: matrix, aij, compressed row, sparse, parallel
1740 
1741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1742 @*/
1743 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1744 {
1745   Mat          B;
1746   Mat_MPIAIJ   *b;
1747   int          ierr, i,size,flag1 = 0, flag2 = 0;
1748 
1749   PetscFunctionBegin;
1750   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1751   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1752   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1753   if (!flag1 && !flag2 && size == 1) {
1754     if (M == PETSC_DECIDE) M = m;
1755     if (N == PETSC_DECIDE) N = n;
1756     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1757     PetscFunctionReturn(0);
1758   }
1759 
1760   *A = 0;
1761   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1762   PLogObjectCreate(B);
1763   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1764   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1765   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1766   B->ops->destroy = MatDestroy_MPIAIJ;
1767   B->ops->view    = MatView_MPIAIJ;
1768   B->factor       = 0;
1769   B->assembled    = PETSC_FALSE;
1770   B->mapping      = 0;
1771 
1772   B->insertmode      = NOT_SET_VALUES;
1773   b->size            = size;
1774   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1775 
1776   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1777     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1778   }
1779 
1780   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1781   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1782   b->m = m; B->m = m;
1783   b->n = n; B->n = n;
1784   b->N = N; B->N = N;
1785   b->M = M; B->M = M;
1786 
1787   /* the information in the maps duplicates the information computed below, eventually
1788      we should remove the duplicate information that is not contained in the maps */
1789   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1790   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1791 
1792   /* build local table of row and column ownerships */
1793   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1794   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1795   b->cowners = b->rowners + b->size + 2;
1796   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1797   b->rowners[0] = 0;
1798   for ( i=2; i<=b->size; i++ ) {
1799     b->rowners[i] += b->rowners[i-1];
1800   }
1801   b->rstart = b->rowners[b->rank];
1802   b->rend   = b->rowners[b->rank+1];
1803   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1804   b->cowners[0] = 0;
1805   for ( i=2; i<=b->size; i++ ) {
1806     b->cowners[i] += b->cowners[i-1];
1807   }
1808   b->cstart = b->cowners[b->rank];
1809   b->cend   = b->cowners[b->rank+1];
1810 
1811   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1812   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1813   PLogObjectParent(B,b->A);
1814   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1815   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1816   PLogObjectParent(B,b->B);
1817 
1818   /* build cache for off array entries formed */
1819   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1820   b->donotstash  = 0;
1821   b->colmap      = 0;
1822   b->garray      = 0;
1823   b->roworiented = 1;
1824 
1825   /* stuff used for matrix vector multiply */
1826   b->lvec      = 0;
1827   b->Mvctx     = 0;
1828 
1829   /* stuff for MatGetRow() */
1830   b->rowindices   = 0;
1831   b->rowvalues    = 0;
1832   b->getrowactive = PETSC_FALSE;
1833 
1834   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1835                                      "MatStoreValues_MPIAIJ",
1836                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1838                                      "MatRetrieveValues_MPIAIJ",
1839                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1840   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1841                                      "MatGetDiagonalBlock_MPIAIJ",
1842                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1843   *A = B;
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNC__
1848 #define __FUNC__ "MatDuplicate_MPIAIJ"
1849 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1850 {
1851   Mat        mat;
1852   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1853   int        ierr, len=0, flg;
1854 
1855   PetscFunctionBegin;
1856   *newmat       = 0;
1857   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1858   PLogObjectCreate(mat);
1859   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1860   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1861   mat->ops->destroy = MatDestroy_MPIAIJ;
1862   mat->ops->view    = MatView_MPIAIJ;
1863   mat->factor       = matin->factor;
1864   mat->assembled    = PETSC_TRUE;
1865   mat->insertmode   = NOT_SET_VALUES;
1866 
1867   a->m = mat->m   = oldmat->m;
1868   a->n = mat->n   = oldmat->n;
1869   a->M = mat->M   = oldmat->M;
1870   a->N = mat->N   = oldmat->N;
1871 
1872   a->rstart       = oldmat->rstart;
1873   a->rend         = oldmat->rend;
1874   a->cstart       = oldmat->cstart;
1875   a->cend         = oldmat->cend;
1876   a->size         = oldmat->size;
1877   a->rank         = oldmat->rank;
1878   a->donotstash   = oldmat->donotstash;
1879   a->roworiented  = oldmat->roworiented;
1880   a->rowindices   = 0;
1881   a->rowvalues    = 0;
1882   a->getrowactive = PETSC_FALSE;
1883 
1884   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1885   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1886   a->cowners = a->rowners + a->size + 2;
1887   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1888   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1889   if (oldmat->colmap) {
1890 #if defined (PETSC_USE_CTABLE)
1891     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1892 #else
1893     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1894     PLogObjectMemory(mat,(a->N)*sizeof(int));
1895     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1896 #endif
1897   } else a->colmap = 0;
1898   if (oldmat->garray) {
1899     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1900     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1901     PLogObjectMemory(mat,len*sizeof(int));
1902     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1903   } else a->garray = 0;
1904 
1905   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1906   PLogObjectParent(mat,a->lvec);
1907   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1908   PLogObjectParent(mat,a->Mvctx);
1909   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1910   PLogObjectParent(mat,a->A);
1911   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1912   PLogObjectParent(mat,a->B);
1913   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1914   if (flg) {
1915     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1916   }
1917   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1918   *newmat = mat;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #include "sys.h"
1923 
1924 #undef __FUNC__
1925 #define __FUNC__ "MatLoad_MPIAIJ"
1926 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1927 {
1928   Mat          A;
1929   Scalar       *vals,*svals;
1930   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1931   MPI_Status   status;
1932   int          i, nz, ierr, j,rstart, rend, fd;
1933   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1934   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1935   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1936 
1937   PetscFunctionBegin;
1938   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1939   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1940   if (!rank) {
1941     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1942     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1943     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1944     if (header[3] < 0) {
1945       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1946     }
1947   }
1948 
1949   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1950   M = header[1]; N = header[2];
1951   /* determine ownership of all rows */
1952   m = M/size + ((M % size) > rank);
1953   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1954   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1955   rowners[0] = 0;
1956   for ( i=2; i<=size; i++ ) {
1957     rowners[i] += rowners[i-1];
1958   }
1959   rstart = rowners[rank];
1960   rend   = rowners[rank+1];
1961 
1962   /* distribute row lengths to all processors */
1963   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1964   offlens = ourlens + (rend-rstart);
1965   if (!rank) {
1966     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1967     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1968     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1969     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1970     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1971     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1972   } else {
1973     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1974   }
1975 
1976   if (!rank) {
1977     /* calculate the number of nonzeros on each processor */
1978     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1979     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1980     for ( i=0; i<size; i++ ) {
1981       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1982         procsnz[i] += rowlengths[j];
1983       }
1984     }
1985     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1986 
1987     /* determine max buffer needed and allocate it */
1988     maxnz = 0;
1989     for ( i=0; i<size; i++ ) {
1990       maxnz = PetscMax(maxnz,procsnz[i]);
1991     }
1992     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1993 
1994     /* read in my part of the matrix column indices  */
1995     nz     = procsnz[0];
1996     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1997     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1998 
1999     /* read in every one elses and ship off */
2000     for ( i=1; i<size; i++ ) {
2001       nz   = procsnz[i];
2002       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2003       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2004     }
2005     ierr = PetscFree(cols);CHKERRQ(ierr);
2006   } else {
2007     /* determine buffer space needed for message */
2008     nz = 0;
2009     for ( i=0; i<m; i++ ) {
2010       nz += ourlens[i];
2011     }
2012     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2013 
2014     /* receive message of column indices*/
2015     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2016     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2017     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2018   }
2019 
2020   /* determine column ownership if matrix is not square */
2021   if (N != M) {
2022     n      = N/size + ((N % size) > rank);
2023     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2024     cstart = cend - n;
2025   } else {
2026     cstart = rstart;
2027     cend   = rend;
2028     n      = cend - cstart;
2029   }
2030 
2031   /* loop over local rows, determining number of off diagonal entries */
2032   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2033   jj = 0;
2034   for ( i=0; i<m; i++ ) {
2035     for ( j=0; j<ourlens[i]; j++ ) {
2036       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2037       jj++;
2038     }
2039   }
2040 
2041   /* create our matrix */
2042   for ( i=0; i<m; i++ ) {
2043     ourlens[i] -= offlens[i];
2044   }
2045   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2046   A = *newmat;
2047   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2048   for ( i=0; i<m; i++ ) {
2049     ourlens[i] += offlens[i];
2050   }
2051 
2052   if (!rank) {
2053     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2054 
2055     /* read in my part of the matrix numerical values  */
2056     nz   = procsnz[0];
2057     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2058 
2059     /* insert into matrix */
2060     jj      = rstart;
2061     smycols = mycols;
2062     svals   = vals;
2063     for ( i=0; i<m; i++ ) {
2064       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2065       smycols += ourlens[i];
2066       svals   += ourlens[i];
2067       jj++;
2068     }
2069 
2070     /* read in other processors and ship out */
2071     for ( i=1; i<size; i++ ) {
2072       nz   = procsnz[i];
2073       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2074       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2075     }
2076     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2077   } else {
2078     /* receive numeric values */
2079     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2080 
2081     /* receive message of values*/
2082     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2083     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2084     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2085 
2086     /* insert into matrix */
2087     jj      = rstart;
2088     smycols = mycols;
2089     svals   = vals;
2090     for ( i=0; i<m; i++ ) {
2091       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2092       smycols += ourlens[i];
2093       svals   += ourlens[i];
2094       jj++;
2095     }
2096   }
2097   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2098   ierr = PetscFree(vals);CHKERRQ(ierr);
2099   ierr = PetscFree(mycols);CHKERRQ(ierr);
2100   ierr = PetscFree(rowners);CHKERRQ(ierr);
2101 
2102   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2103   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 #undef __FUNC__
2108 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2109 /*
2110     Not great since it makes two copies of the submatrix, first an SeqAIJ
2111   in local and then by concatenating the local matrices the end result.
2112   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2113 */
2114 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2115 {
2116   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2117   Mat        *local,M, Mreuse;
2118   Scalar     *vwork,*aa;
2119   MPI_Comm   comm = mat->comm;
2120   Mat_SeqAIJ *aij;
2121   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2122 
2123   PetscFunctionBegin;
2124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2125   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2126 
2127   if (call ==  MAT_REUSE_MATRIX) {
2128     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2129     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2130     local = &Mreuse;
2131     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2132   } else {
2133     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2134     Mreuse = *local;
2135     ierr = PetscFree(local);CHKERRQ(ierr);
2136   }
2137 
2138   /*
2139       m - number of local rows
2140       n - number of columns (same on all processors)
2141       rstart - first row in new global matrix generated
2142   */
2143   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2144   if (call == MAT_INITIAL_MATRIX) {
2145     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2146     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2147     ii  = aij->i;
2148     jj  = aij->j;
2149 
2150     /*
2151         Determine the number of non-zeros in the diagonal and off-diagonal
2152         portions of the matrix in order to do correct preallocation
2153     */
2154 
2155     /* first get start and end of "diagonal" columns */
2156     if (csize == PETSC_DECIDE) {
2157       nlocal = n/size + ((n % size) > rank);
2158     } else {
2159       nlocal = csize;
2160     }
2161     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2162     rstart = rend - nlocal;
2163     if (rank == size - 1 && rend != n) {
2164       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2165     }
2166 
2167     /* next, compute all the lengths */
2168     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2169     olens = dlens + m;
2170     for ( i=0; i<m; i++ ) {
2171       jend = ii[i+1] - ii[i];
2172       olen = 0;
2173       dlen = 0;
2174       for ( j=0; j<jend; j++ ) {
2175         if ( *jj < rstart || *jj >= rend) olen++;
2176         else dlen++;
2177         jj++;
2178       }
2179       olens[i] = olen;
2180       dlens[i] = dlen;
2181     }
2182     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2183     ierr = PetscFree(dlens);CHKERRQ(ierr);
2184   } else {
2185     int ml,nl;
2186 
2187     M = *newmat;
2188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2191     /*
2192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2193        rather than the slower MatSetValues().
2194     */
2195     M->was_assembled = PETSC_TRUE;
2196     M->assembled     = PETSC_FALSE;
2197   }
2198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2199   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2200   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2201   ii  = aij->i;
2202   jj  = aij->j;
2203   aa  = aij->a;
2204   for (i=0; i<m; i++) {
2205     row   = rstart + i;
2206     nz    = ii[i+1] - ii[i];
2207     cwork = jj;     jj += nz;
2208     vwork = aa;     aa += nz;
2209     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2210   }
2211 
2212   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2213   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2214   *newmat = M;
2215 
2216   /* save submatrix used in processor for next request */
2217   if (call ==  MAT_INITIAL_MATRIX) {
2218     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2219     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2220   }
2221 
2222   PetscFunctionReturn(0);
2223 }
2224