xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4787f7683dc5ea85e35b4269f7afceca0dae908f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.279 1999/02/11 20:28:21 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 #if defined (USE_CTABLE)
22   int        ierr;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined (USE_CTABLE)
27   ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr);
28   for ( i=0; i<n; i++ ){
29     ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr);
30   }
31 #else
32   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
33   PLogObjectMemory(mat,aij->N*sizeof(int));
34   PetscMemzero(aij->colmap,aij->N*sizeof(int));
35   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
36 #endif
37   PetscFunctionReturn(0);
38 }
39 
40 extern int DisAssemble_MPIAIJ(Mat);
41 
42 #define CHUNKSIZE   15
43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
44 { \
45  \
46     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
47     rmax = aimax[row]; nrow = ailen[row];  \
48     col1 = col - shift; \
49      \
50     low = 0; high = nrow; \
51     while (high-low > 5) { \
52       t = (low+high)/2; \
53       if (rp[t] > col) high = t; \
54       else             low  = t; \
55     } \
56       for ( _i=0; _i<nrow; _i++ ) { \
57         if (rp[_i] > col1) break; \
58         if (rp[_i] == col1) { \
59           if (addv == ADD_VALUES) ap[_i] += value;   \
60           else                  ap[_i] = value; \
61           goto a_noinsert; \
62         } \
63       }  \
64       if (nonew == 1) goto a_noinsert; \
65       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
66       if (nrow >= rmax) { \
67         /* there is no extra room in row, therefore enlarge */ \
68         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
69         Scalar *new_a; \
70  \
71         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
72  \
73         /* malloc new storage space */ \
74         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
75         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
76         new_j   = (int *) (new_a + new_nz); \
77         new_i   = new_j + new_nz; \
78  \
79         /* copy over old data into new slots */ \
80         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
81         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
82         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
83         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
84         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
85                                                            len*sizeof(int)); \
86         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
87         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
88                                                            len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90  \
91         PetscFree(a->a);  \
92         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
93         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
94         a->singlemalloc = 1; \
95  \
96         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
97         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
98         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
99         a->maxnz += CHUNKSIZE; \
100         a->reallocs++; \
101       } \
102       N = nrow++ - 1; a->nz++; \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         ap[ii+1] = ap[ii]; \
107       } \
108       rp[_i] = col1;  \
109       ap[_i] = value;  \
110       a_noinsert: ; \
111       ailen[row] = nrow; \
112 }
113 
114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
118     rmax = bimax[row]; nrow = bilen[row];  \
119     col1 = col - shift; \
120      \
121     low = 0; high = nrow; \
122     while (high-low > 5) { \
123       t = (low+high)/2; \
124       if (rp[t] > col) high = t; \
125       else             low  = t; \
126     } \
127        for ( _i=0; _i<nrow; _i++ ) { \
128         if (rp[_i] > col1) break; \
129         if (rp[_i] == col1) { \
130           if (addv == ADD_VALUES) ap[_i] += value;   \
131           else                  ap[_i] = value; \
132           goto b_noinsert; \
133         } \
134       }  \
135       if (nonew == 1) goto b_noinsert; \
136       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
137       if (nrow >= rmax) { \
138         /* there is no extra room in row, therefore enlarge */ \
139         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
140         Scalar *new_a; \
141  \
142         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
143  \
144         /* malloc new storage space */ \
145         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
146         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
147         new_j   = (int *) (new_a + new_nz); \
148         new_i   = new_j + new_nz; \
149  \
150         /* copy over old data into new slots */ \
151         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
152         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
153         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
154         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
155         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
156                                                            len*sizeof(int)); \
157         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
158         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
159                                                            len*sizeof(Scalar));  \
160         /* free up old matrix storage */ \
161  \
162         PetscFree(b->a);  \
163         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = 1; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for ( ii=N; ii>=_i; ii-- ) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
186 #undef __FUNC__
187 #define __FUNC__ "MatSetValues_MPIAIJ"
188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
189 {
190   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
191   Scalar     value;
192   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
193   int        cstart = aij->cstart, cend = aij->cend,row,col;
194   int        roworiented = aij->roworiented;
195 
196   /* Some Variables required in the macro */
197   Mat        A = aij->A;
198   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
199   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
200   Scalar     *aa = a->a;
201 
202   Mat        B = aij->B;
203   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
204   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
205   Scalar     *ba = b->a;
206 
207   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
208   int        nonew = a->nonew,shift = a->indexshift;
209   Scalar     *ap;
210 
211   PetscFunctionBegin;
212   for ( i=0; i<m; i++ ) {
213     if (im[i] < 0) continue;
214 #if defined(USE_PETSC_BOPT_g)
215     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
216 #endif
217     if (im[i] >= rstart && im[i] < rend) {
218       row = im[i] - rstart;
219       for ( j=0; j<n; j++ ) {
220         if (in[j] >= cstart && in[j] < cend){
221           col = in[j] - cstart;
222           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         }
226         else if (in[j] < 0) continue;
227 #if defined(USE_PETSC_BOPT_g)
228         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
229 #endif
230         else {
231           if (mat->was_assembled) {
232             if (!aij->colmap) {
233               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
234             }
235 #if defined (USE_CTABLE)
236 	    col = TableFind( aij->colmap, in[j] + 1 ) - 1;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ *) B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !aij->donotstash) {
257         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!aij->donotstash) {
260           row = im[i];
261           for ( j=0; j<n; j++ ) {
262             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNC__
272 #define __FUNC__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
276   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
277   int        cstart = aij->cstart, cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for ( i=0; i<m; i++ ) {
281     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
282     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for ( j=0; j<n; j++ ) {
286         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
287         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (USE_CTABLE)
296 	  col = TableFind( aij->colmap, idxn[j] + 1 ) - 1;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
318   MPI_Comm    comm = mat->comm;
319   int         size = aij->size, *owners = aij->rowners;
320   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
321   MPI_Request *send_waits,*recv_waits;
322   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
323   InsertMode  addv;
324   Scalar      *rvalues,*svalues;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     aij->svalues    = 0; aij->rvalues    = 0;
329     aij->nsends     = 0; aij->nrecvs     = 0;
330     aij->send_waits = 0; aij->recv_waits = 0;
331     aij->rmax       = 0;
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   /*  first count number of contributors to each processor */
343   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
344   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
345   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
346   for ( i=0; i<aij->stash.n; i++ ) {
347     idx = aij->stash.idx[i];
348     for ( j=0; j<size; j++ ) {
349       if (idx >= owners[j] && idx < owners[j+1]) {
350         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
351       }
352     }
353   }
354   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
355 
356   /* inform other processors of number of messages and max length*/
357   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
358   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
359   nreceives = work[rank];
360   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
361   nmax = work[rank];
362   PetscFree(work);
363 
364   /* post receives:
365        1) each message will consist of ordered pairs
366      (global index,value) we store the global index as a double
367      to simplify the message passing.
368        2) since we don't know how long each individual message is we
369      allocate the largest needed buffer for each receive. Potentially
370      this is a lot of wasted space.
371 
372 
373        This could be done better.
374   */
375   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
376   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
377   for ( i=0; i<nreceives; i++ ) {
378     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
379               comm,recv_waits+i);CHKERRQ(ierr);
380   }
381 
382   /* do sends:
383       1) starts[i] gives the starting index in svalues for stuff going to
384          the ith processor
385   */
386   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
387   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
388   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
389   starts[0]  = 0;
390   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
391   for ( i=0; i<aij->stash.n; i++ ) {
392     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
393     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
394     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
395   }
396   PetscFree(owner);
397   starts[0] = 0;
398   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
399   count = 0;
400   for ( i=0; i<size; i++ ) {
401     if (procs[i]) {
402       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
403     }
404   }
405   PetscFree(starts);
406   PetscFree(nprocs);
407 
408   /* Free cache space */
409   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
410   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
411 
412   aij->svalues    = svalues;    aij->rvalues    = rvalues;
413   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
414   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
415   aij->rmax       = nmax;
416 
417   PetscFunctionReturn(0);
418 }
419 extern int MatSetUpMultiply_MPIAIJ(Mat);
420 
421 #undef __FUNC__
422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
424 {
425   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
426   MPI_Status  *send_status,recv_status;
427   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
428   int         row,col,other_disassembled;
429   Scalar      *values,val;
430   InsertMode  addv = mat->insertmode;
431 
432   PetscFunctionBegin;
433 
434   /*  wait on receives */
435   while (count) {
436     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
437     /* unpack receives into our local space */
438     values = aij->rvalues + 3*imdex*aij->rmax;
439     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
440     n = n/3;
441     for ( i=0; i<n; i++ ) {
442       row = (int) PetscReal(values[3*i]) - aij->rstart;
443       col = (int) PetscReal(values[3*i+1]);
444       val = values[3*i+2];
445       if (col >= aij->cstart && col < aij->cend) {
446         col -= aij->cstart;
447         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
448       } else {
449         if (mat->was_assembled || mat->assembled) {
450           if (!aij->colmap) {
451             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
452           }
453 #if defined (USE_CTABLE)
454           col = TableFind( aij->colmap, col + 1 ) - 1;
455 #else
456           col = aij->colmap[col] - 1;
457 #endif
458           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
459             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460             col = (int) PetscReal(values[3*i+1]);
461           }
462         }
463         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
464       }
465     }
466     count--;
467   }
468   if (aij->recv_waits) PetscFree(aij->recv_waits);
469   if (aij->rvalues)    PetscFree(aij->rvalues);
470 
471   /* wait on sends */
472   if (aij->nsends) {
473     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
474     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
475     PetscFree(send_status);
476   }
477   if (aij->send_waits) PetscFree(aij->send_waits);
478   if (aij->svalues)    PetscFree(aij->svalues);
479 
480   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
481   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
482 
483   /* determine if any processor has disassembled, if so we must
484      also disassemble ourselfs, in order that we may reassemble. */
485   /*
486      if nonzero structure of submatrix B cannot change then we know that
487      no processor disassembled thus we can skip this stuff
488   */
489   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
490     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
491     if (mat->was_assembled && !other_disassembled) {
492       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
493     }
494   }
495 
496   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
497     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
498   }
499   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
500   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
501 
502   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNC__
507 #define __FUNC__ "MatZeroEntries_MPIAIJ"
508 int MatZeroEntries_MPIAIJ(Mat A)
509 {
510   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
511   int        ierr;
512 
513   PetscFunctionBegin;
514   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
515   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNC__
520 #define __FUNC__ "MatZeroRows_MPIAIJ"
521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
522 {
523   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
524   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
525   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
526   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
527   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
528   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
529   MPI_Comm       comm = A->comm;
530   MPI_Request    *send_waits,*recv_waits;
531   MPI_Status     recv_status,*send_status;
532   IS             istmp;
533 
534   PetscFunctionBegin;
535   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
536   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
537 
538   /*  first count number of contributors to each processor */
539   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
540   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
541   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
542   for ( i=0; i<N; i++ ) {
543     idx = rows[i];
544     found = 0;
545     for ( j=0; j<size; j++ ) {
546       if (idx >= owners[j] && idx < owners[j+1]) {
547         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
548       }
549     }
550     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
551   }
552   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
553 
554   /* inform other processors of number of messages and max length*/
555   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
556   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
557   nrecvs = work[rank];
558   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
559   nmax = work[rank];
560   PetscFree(work);
561 
562   /* post receives:   */
563   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
564   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
565   for ( i=0; i<nrecvs; i++ ) {
566     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
567   }
568 
569   /* do sends:
570       1) starts[i] gives the starting index in svalues for stuff going to
571          the ith processor
572   */
573   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
574   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
575   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
576   starts[0] = 0;
577   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
578   for ( i=0; i<N; i++ ) {
579     svalues[starts[owner[i]]++] = rows[i];
580   }
581   ISRestoreIndices(is,&rows);
582 
583   starts[0] = 0;
584   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
585   count = 0;
586   for ( i=0; i<size; i++ ) {
587     if (procs[i]) {
588       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
589     }
590   }
591   PetscFree(starts);
592 
593   base = owners[rank];
594 
595   /*  wait on receives */
596   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
597   source = lens + nrecvs;
598   count  = nrecvs; slen = 0;
599   while (count) {
600     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
601     /* unpack receives into our local space */
602     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
603     source[imdex]  = recv_status.MPI_SOURCE;
604     lens[imdex]  = n;
605     slen += n;
606     count--;
607   }
608   PetscFree(recv_waits);
609 
610   /* move the data into the send scatter */
611   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
612   count = 0;
613   for ( i=0; i<nrecvs; i++ ) {
614     values = rvalues + i*nmax;
615     for ( j=0; j<lens[i]; j++ ) {
616       lrows[count++] = values[j] - base;
617     }
618   }
619   PetscFree(rvalues); PetscFree(lens);
620   PetscFree(owner); PetscFree(nprocs);
621 
622   /* actually zap the local rows */
623   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
624   PLogObjectParent(A,istmp);
625 
626   /*
627         Zero the required rows. If the "diagonal block" of the matrix
628      is square and the user wishes to set the diagonal we use seperate
629      code so that MatSetValues() is not called for each diagonal allocating
630      new memory, thus calling lots of mallocs and slowing things down.
631 
632        Contributed by: Mathew Knepley
633   */
634   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
635   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
636   if (diag && (l->A->M == l->A->N)) {
637     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
638   } else if (diag) {
639     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
640     for ( i = 0; i < slen; i++ ) {
641       row  = lrows[i] + rstart;
642       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
643     }
644     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
645     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646   } else {
647     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
648   }
649   ierr = ISDestroy(istmp); CHKERRQ(ierr);
650   PetscFree(lrows);
651 
652   /* wait on sends */
653   if (nsends) {
654     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
655     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
656     PetscFree(send_status);
657   }
658   PetscFree(send_waits); PetscFree(svalues);
659 
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNC__
664 #define __FUNC__ "MatMult_MPIAIJ"
665 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
666 {
667   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
668   int        ierr,nt;
669 
670   PetscFunctionBegin;
671   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
672   if (nt != a->n) {
673     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
674   }
675   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
676   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
677   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
678   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNC__
683 #define __FUNC__ "MatMultAdd_MPIAIJ"
684 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
685 {
686   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
687   int        ierr;
688 
689   PetscFunctionBegin;
690   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
691   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
692   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
693   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNC__
698 #define __FUNC__ "MatMultTrans_MPIAIJ"
699 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
700 {
701   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
702   int        ierr;
703 
704   PetscFunctionBegin;
705   /* do nondiagonal part */
706   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
707   /* send it on its way */
708   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
709   /* do local part */
710   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
711   /* receive remote parts: note this assumes the values are not actually */
712   /* inserted in yy until the next line, which is true for my implementation*/
713   /* but is not perhaps always true. */
714   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNC__
719 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
720 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
721 {
722   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
723   int        ierr;
724 
725   PetscFunctionBegin;
726   /* do nondiagonal part */
727   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
728   /* send it on its way */
729   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
730   /* do local part */
731   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
732   /* receive remote parts: note this assumes the values are not actually */
733   /* inserted in yy until the next line, which is true for my implementation*/
734   /* but is not perhaps always true. */
735   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 /*
740   This only works correctly for square matrices where the subblock A->A is the
741    diagonal block
742 */
743 #undef __FUNC__
744 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
745 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
746 {
747   int        ierr;
748   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
749 
750   PetscFunctionBegin;
751   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
752   if (a->rstart != a->cstart || a->rend != a->cend) {
753     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
754   }
755   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNC__
760 #define __FUNC__ "MatScale_MPIAIJ"
761 int MatScale_MPIAIJ(Scalar *aa,Mat A)
762 {
763   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
764   int        ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
768   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNC__
773 #define __FUNC__ "MatDestroy_MPIAIJ"
774 int MatDestroy_MPIAIJ(Mat mat)
775 {
776   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
777   int        ierr;
778 
779   PetscFunctionBegin;
780   if (--mat->refct > 0) PetscFunctionReturn(0);
781 
782   if (mat->mapping) {
783     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
784   }
785   if (mat->bmapping) {
786     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
787   }
788   if (mat->rmap) {
789     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
790   }
791   if (mat->cmap) {
792     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
793   }
794 #if defined(USE_PETSC_LOG)
795   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
796 #endif
797   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
798   PetscFree(aij->rowners);
799   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
800   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
801 #if defined (USE_CTABLE)
802   if (aij->colmap) TableDelete(aij->colmap);
803 #else
804   if (aij->colmap) PetscFree(aij->colmap);
805 #endif
806   if (aij->garray) PetscFree(aij->garray);
807   if (aij->lvec)   VecDestroy(aij->lvec);
808   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
809   if (aij->rowvalues) PetscFree(aij->rowvalues);
810   PetscFree(aij);
811   PLogObjectDestroy(mat);
812   PetscHeaderDestroy(mat);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNC__
817 #define __FUNC__ "MatView_MPIAIJ_Binary"
818 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
819 {
820   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
821   int         ierr;
822 
823   PetscFunctionBegin;
824   if (aij->size == 1) {
825     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
826   }
827   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNC__
832 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
833 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
834 {
835   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
836   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
837   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
838   int         size = aij->size;
839   FILE        *fd;
840   ViewerType  vtype;
841 
842   PetscFunctionBegin;
843   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
844   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
845     ierr = ViewerGetFormat(viewer,&format);
846     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
847       MatInfo info;
848       int     flg;
849       MPI_Comm_rank(mat->comm,&rank);
850       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
851       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
852       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
853       PetscSequentialPhaseBegin(mat->comm,1);
854       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
855          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
856       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
857          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
858       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
859       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
860       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
861       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
862       fflush(fd);
863       PetscSequentialPhaseEnd(mat->comm,1);
864       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
865       PetscFunctionReturn(0);
866     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
867       PetscFunctionReturn(0);
868     }
869   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
870     Draw       draw;
871     PetscTruth isnull;
872     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
873     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
874   }
875 
876   if (size == 1) {
877     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
878   } else {
879     /* assemble the entire matrix onto first processor. */
880     Mat         A;
881     Mat_SeqAIJ *Aloc;
882     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
883     Scalar      *a;
884 
885     if (!rank) {
886       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
887     } else {
888       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
889     }
890     PLogObjectParent(mat,A);
891 
892     /* copy over the A part */
893     Aloc = (Mat_SeqAIJ*) aij->A->data;
894     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
895     row = aij->rstart;
896     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
897     for ( i=0; i<m; i++ ) {
898       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
899       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
900     }
901     aj = Aloc->j;
902     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
903 
904     /* copy over the B part */
905     Aloc = (Mat_SeqAIJ*) aij->B->data;
906     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
907     row = aij->rstart;
908     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
909     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
910     for ( i=0; i<m; i++ ) {
911       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
912       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
913     }
914     PetscFree(ct);
915     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
916     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
917     /*
918        Everyone has to call to draw the matrix since the graphics waits are
919        synchronized across all processors that share the Draw object
920     */
921     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
922       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
923     }
924     ierr = MatDestroy(A); CHKERRQ(ierr);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNC__
930 #define __FUNC__ "MatView_MPIAIJ"
931 int MatView_MPIAIJ(Mat mat,Viewer viewer)
932 {
933   int         ierr;
934   ViewerType  vtype;
935 
936   PetscFunctionBegin;
937   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
938   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
939       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
940     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
941   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
942     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
943   } else {
944     SETERRQ(1,1,"Viewer type not supported by PETSc object");
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 /*
950     This has to provide several versions.
951 
952      2) a) use only local smoothing updating outer values only once.
953         b) local smoothing updating outer values each inner iteration
954      3) color updating out values betwen colors.
955 */
956 #undef __FUNC__
957 #define __FUNC__ "MatRelax_MPIAIJ"
958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
959                            double fshift,int its,Vec xx)
960 {
961   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
962   Mat        AA = mat->A, BB = mat->B;
963   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
964   Scalar     *b,*x,*xs,*ls,d,*v,sum;
965   int        ierr,*idx, *diag;
966   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
967 
968   PetscFunctionBegin;
969   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
970   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
971   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
972   xs = x + shift; /* shift by one for index start of 1 */
973   ls = ls + shift;
974   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
975   diag = A->diag;
976   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
977     if (flag & SOR_ZERO_INITIAL_GUESS) {
978       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
979       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
980       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
981       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     }
984     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
985     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
986     while (its--) {
987       /* go down through the rows */
988       for ( i=0; i<m; i++ ) {
989         n    = A->i[i+1] - A->i[i];
990 	PLogFlops(4*n+3);
991         idx  = A->j + A->i[i] + shift;
992         v    = A->a + A->i[i] + shift;
993         sum  = b[i];
994         SPARSEDENSEMDOT(sum,xs,v,idx,n);
995         d    = fshift + A->a[diag[i]+shift];
996         n    = B->i[i+1] - B->i[i];
997         idx  = B->j + B->i[i] + shift;
998         v    = B->a + B->i[i] + shift;
999         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1000         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1001       }
1002       /* come up through the rows */
1003       for ( i=m-1; i>-1; i-- ) {
1004         n    = A->i[i+1] - A->i[i];
1005 	PLogFlops(4*n+3)
1006         idx  = A->j + A->i[i] + shift;
1007         v    = A->a + A->i[i] + shift;
1008         sum  = b[i];
1009         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1010         d    = fshift + A->a[diag[i]+shift];
1011         n    = B->i[i+1] - B->i[i];
1012         idx  = B->j + B->i[i] + shift;
1013         v    = B->a + B->i[i] + shift;
1014         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1015         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1016       }
1017     }
1018   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1019     if (flag & SOR_ZERO_INITIAL_GUESS) {
1020       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1021       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1022       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1023       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1024       PetscFunctionReturn(0);
1025     }
1026     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1027     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028     while (its--) {
1029       for ( i=0; i<m; i++ ) {
1030         n    = A->i[i+1] - A->i[i];
1031 	PLogFlops(4*n+3);
1032         idx  = A->j + A->i[i] + shift;
1033         v    = A->a + A->i[i] + shift;
1034         sum  = b[i];
1035         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1036         d    = fshift + A->a[diag[i]+shift];
1037         n    = B->i[i+1] - B->i[i];
1038         idx  = B->j + B->i[i] + shift;
1039         v    = B->a + B->i[i] + shift;
1040         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1041         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1042       }
1043     }
1044   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1047       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1048       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1049       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1050       PetscFunctionReturn(0);
1051     }
1052     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054     while (its--) {
1055       for ( i=m-1; i>-1; i-- ) {
1056         n    = A->i[i+1] - A->i[i];
1057 	PLogFlops(4*n+3);
1058         idx  = A->j + A->i[i] + shift;
1059         v    = A->a + A->i[i] + shift;
1060         sum  = b[i];
1061         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1062         d    = fshift + A->a[diag[i]+shift];
1063         n    = B->i[i+1] - B->i[i];
1064         idx  = B->j + B->i[i] + shift;
1065         v    = B->a + B->i[i] + shift;
1066         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1067         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1068       }
1069     }
1070   } else {
1071     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1072   }
1073   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1074   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1075   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNC__
1080 #define __FUNC__ "MatGetInfo_MPIAIJ"
1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084   Mat        A = mat->A, B = mat->B;
1085   int        ierr;
1086   double     isend[5], irecv[5];
1087 
1088   PetscFunctionBegin;
1089   info->block_size     = 1.0;
1090   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1091   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092   isend[3] = info->memory;  isend[4] = info->mallocs;
1093   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1094   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095   isend[3] += info->memory;  isend[4] += info->mallocs;
1096   if (flag == MAT_LOCAL) {
1097     info->nz_used      = isend[0];
1098     info->nz_allocated = isend[1];
1099     info->nz_unneeded  = isend[2];
1100     info->memory       = isend[3];
1101     info->mallocs      = isend[4];
1102   } else if (flag == MAT_GLOBAL_MAX) {
1103     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1104     info->nz_used      = irecv[0];
1105     info->nz_allocated = irecv[1];
1106     info->nz_unneeded  = irecv[2];
1107     info->memory       = irecv[3];
1108     info->mallocs      = irecv[4];
1109   } else if (flag == MAT_GLOBAL_SUM) {
1110     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1111     info->nz_used      = irecv[0];
1112     info->nz_allocated = irecv[1];
1113     info->nz_unneeded  = irecv[2];
1114     info->memory       = irecv[3];
1115     info->mallocs      = irecv[4];
1116   }
1117   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1118   info->fill_ratio_needed = 0;
1119   info->factor_mallocs    = 0;
1120   info->rows_global       = (double)mat->M;
1121   info->columns_global    = (double)mat->N;
1122   info->rows_local        = (double)mat->m;
1123   info->columns_local     = (double)mat->N;
1124 
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNC__
1129 #define __FUNC__ "MatSetOption_MPIAIJ"
1130 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131 {
1132   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1133   int        ierr;
1134 
1135   PetscFunctionBegin;
1136   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1137       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1138       op == MAT_COLUMNS_UNSORTED ||
1139       op == MAT_COLUMNS_SORTED ||
1140       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1141       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1142         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1143         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1144   } else if (op == MAT_ROW_ORIENTED) {
1145     a->roworiented = 1;
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148   } else if (op == MAT_ROWS_SORTED ||
1149              op == MAT_ROWS_UNSORTED ||
1150              op == MAT_SYMMETRIC ||
1151              op == MAT_STRUCTURALLY_SYMMETRIC ||
1152              op == MAT_YES_NEW_DIAGONALS)
1153     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1154   else if (op == MAT_COLUMN_ORIENTED) {
1155     a->roworiented = 0;
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1159     a->donotstash = 1;
1160   } else if (op == MAT_NO_NEW_DIAGONALS){
1161     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1162   } else {
1163     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1164   }
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNC__
1169 #define __FUNC__ "MatGetSize_MPIAIJ"
1170 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1171 {
1172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1173 
1174   PetscFunctionBegin;
1175   if (m) *m = mat->M;
1176   if (n) *n = mat->N;
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1182 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1183 {
1184   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1185 
1186   PetscFunctionBegin;
1187   if (m) *m = mat->m;
1188   if (n) *n = mat->n;
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNC__
1193 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1194 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1195 {
1196   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1197 
1198   PetscFunctionBegin;
1199   *m = mat->rstart; *n = mat->rend;
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1204 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1205 
1206 #undef __FUNC__
1207 #define __FUNC__ "MatGetRow_MPIAIJ"
1208 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1209 {
1210   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1211   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1212   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1213   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1214   int        *cmap, *idx_p;
1215 
1216   PetscFunctionBegin;
1217   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1218   mat->getrowactive = PETSC_TRUE;
1219 
1220   if (!mat->rowvalues && (idx || v)) {
1221     /*
1222         allocate enough space to hold information from the longest row.
1223     */
1224     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1225     int     max = 1,m = mat->m,tmp;
1226     for ( i=0; i<m; i++ ) {
1227       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1228       if (max < tmp) { max = tmp; }
1229     }
1230     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1231     mat->rowindices = (int *) (mat->rowvalues + max);
1232   }
1233 
1234   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1235   lrow = row - rstart;
1236 
1237   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1238   if (!v)   {pvA = 0; pvB = 0;}
1239   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1240   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1241   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1242   nztot = nzA + nzB;
1243 
1244   cmap  = mat->garray;
1245   if (v  || idx) {
1246     if (nztot) {
1247       /* Sort by increasing column numbers, assuming A and B already sorted */
1248       int imark = -1;
1249       if (v) {
1250         *v = v_p = mat->rowvalues;
1251         for ( i=0; i<nzB; i++ ) {
1252           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1253           else break;
1254         }
1255         imark = i;
1256         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1257         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1258       }
1259       if (idx) {
1260         *idx = idx_p = mat->rowindices;
1261         if (imark > -1) {
1262           for ( i=0; i<imark; i++ ) {
1263             idx_p[i] = cmap[cworkB[i]];
1264           }
1265         } else {
1266           for ( i=0; i<nzB; i++ ) {
1267             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1268             else break;
1269           }
1270           imark = i;
1271         }
1272         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1273         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1274       }
1275     } else {
1276       if (idx) *idx = 0;
1277       if (v)   *v   = 0;
1278     }
1279   }
1280   *nz = nztot;
1281   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1282   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNC__
1287 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1288 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1289 {
1290   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1291 
1292   PetscFunctionBegin;
1293   if (aij->getrowactive == PETSC_FALSE) {
1294     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1295   }
1296   aij->getrowactive = PETSC_FALSE;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatNorm_MPIAIJ"
1302 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1303 {
1304   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1305   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1306   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1307   double     sum = 0.0;
1308   Scalar     *v;
1309 
1310   PetscFunctionBegin;
1311   if (aij->size == 1) {
1312     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1313   } else {
1314     if (type == NORM_FROBENIUS) {
1315       v = amat->a;
1316       for (i=0; i<amat->nz; i++ ) {
1317 #if defined(USE_PETSC_COMPLEX)
1318         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1319 #else
1320         sum += (*v)*(*v); v++;
1321 #endif
1322       }
1323       v = bmat->a;
1324       for (i=0; i<bmat->nz; i++ ) {
1325 #if defined(USE_PETSC_COMPLEX)
1326         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1327 #else
1328         sum += (*v)*(*v); v++;
1329 #endif
1330       }
1331       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1332       *norm = sqrt(*norm);
1333     } else if (type == NORM_1) { /* max column norm */
1334       double *tmp, *tmp2;
1335       int    *jj, *garray = aij->garray;
1336       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1337       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1338       PetscMemzero(tmp,aij->N*sizeof(double));
1339       *norm = 0.0;
1340       v = amat->a; jj = amat->j;
1341       for ( j=0; j<amat->nz; j++ ) {
1342         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1343       }
1344       v = bmat->a; jj = bmat->j;
1345       for ( j=0; j<bmat->nz; j++ ) {
1346         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1347       }
1348       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1349       for ( j=0; j<aij->N; j++ ) {
1350         if (tmp2[j] > *norm) *norm = tmp2[j];
1351       }
1352       PetscFree(tmp); PetscFree(tmp2);
1353     } else if (type == NORM_INFINITY) { /* max row norm */
1354       double ntemp = 0.0;
1355       for ( j=0; j<amat->m; j++ ) {
1356         v = amat->a + amat->i[j] + shift;
1357         sum = 0.0;
1358         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1359           sum += PetscAbsScalar(*v); v++;
1360         }
1361         v = bmat->a + bmat->i[j] + shift;
1362         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         if (sum > ntemp) ntemp = sum;
1366       }
1367       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1368     } else {
1369       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1370     }
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ "MatTranspose_MPIAIJ"
1377 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1378 {
1379   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1380   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1381   int        ierr,shift = Aloc->indexshift;
1382   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1383   Mat        B;
1384   Scalar     *array;
1385 
1386   PetscFunctionBegin;
1387   if (matout == PETSC_NULL && M != N) {
1388     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1389   }
1390 
1391   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1392 
1393   /* copy over the A part */
1394   Aloc = (Mat_SeqAIJ*) a->A->data;
1395   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1396   row = a->rstart;
1397   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1398   for ( i=0; i<m; i++ ) {
1399     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1400     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1401   }
1402   aj = Aloc->j;
1403   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1404 
1405   /* copy over the B part */
1406   Aloc = (Mat_SeqAIJ*) a->B->data;
1407   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1408   row = a->rstart;
1409   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1410   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1411   for ( i=0; i<m; i++ ) {
1412     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1413     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1414   }
1415   PetscFree(ct);
1416   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1417   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1418   if (matout != PETSC_NULL) {
1419     *matout = B;
1420   } else {
1421     PetscOps       *Abops;
1422     struct _MatOps *Aops;
1423 
1424     /* This isn't really an in-place transpose .... but free data structures from a */
1425     PetscFree(a->rowners);
1426     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1427     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1428 #if defined (USE_CTABLE)
1429     if (a->colmap) TableDelete(a->colmap);
1430 #else
1431     if (a->colmap) PetscFree(a->colmap);
1432 #endif
1433     if (a->garray) PetscFree(a->garray);
1434     if (a->lvec) VecDestroy(a->lvec);
1435     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1436     PetscFree(a);
1437 
1438     /*
1439        This is horrible, horrible code. We need to keep the
1440       A pointers for the bops and ops but copy everything
1441       else from C.
1442     */
1443     Abops = A->bops;
1444     Aops  = A->ops;
1445     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1446     A->bops = Abops;
1447     A->ops  = Aops;
1448     PetscHeaderDestroy(B);
1449   }
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNC__
1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1456 {
1457   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1458   Mat a = aij->A, b = aij->B;
1459   int ierr,s1,s2,s3;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1463   if (rr) {
1464     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1465     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1466     /* Overlap communication with computation. */
1467     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1468   }
1469   if (ll) {
1470     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1471     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1472     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1473   }
1474   /* scale  the diagonal block */
1475   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1476 
1477   if (rr) {
1478     /* Do a scatter end and then right scale the off-diagonal block */
1479     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1480     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1481   }
1482 
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 
1487 extern int MatPrintHelp_SeqAIJ(Mat);
1488 #undef __FUNC__
1489 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1490 int MatPrintHelp_MPIAIJ(Mat A)
1491 {
1492   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1493   int        ierr;
1494 
1495   PetscFunctionBegin;
1496   if (!a->rank) {
1497     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNC__
1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1505 {
1506   PetscFunctionBegin;
1507   *bs = 1;
1508   PetscFunctionReturn(0);
1509 }
1510 #undef __FUNC__
1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1512 int MatSetUnfactored_MPIAIJ(Mat A)
1513 {
1514   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1515   int        ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNC__
1523 #define __FUNC__ "MatEqual_MPIAIJ"
1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1525 {
1526   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1527   Mat        a, b, c, d;
1528   PetscTruth flg;
1529   int        ierr;
1530 
1531   PetscFunctionBegin;
1532   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1533   a = matA->A; b = matA->B;
1534   c = matB->A; d = matB->B;
1535 
1536   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1537   if (flg == PETSC_TRUE) {
1538     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1539   }
1540   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNC__
1545 #define __FUNC__ "MatCopy_MPIAIJ"
1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1547 {
1548   int        ierr;
1549   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1550   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1551 
1552   PetscFunctionBegin;
1553   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1554     /* because of the column compression in the off-processor part of the matrix a->B,
1555        the number of columns in a->B and b->B may be different, hence we cannot call
1556        the MatCopy() directly on the two parts. If need be, we can provide a more
1557        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1558        then copying the submatrices */
1559     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1560   } else {
1561     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1562     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1572 
1573 /* -------------------------------------------------------------------*/
1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1575        MatGetRow_MPIAIJ,
1576        MatRestoreRow_MPIAIJ,
1577        MatMult_MPIAIJ,
1578        MatMultAdd_MPIAIJ,
1579        MatMultTrans_MPIAIJ,
1580        MatMultTransAdd_MPIAIJ,
1581        0,
1582        0,
1583        0,
1584        0,
1585        0,
1586        0,
1587        MatRelax_MPIAIJ,
1588        MatTranspose_MPIAIJ,
1589        MatGetInfo_MPIAIJ,
1590        MatEqual_MPIAIJ,
1591        MatGetDiagonal_MPIAIJ,
1592        MatDiagonalScale_MPIAIJ,
1593        MatNorm_MPIAIJ,
1594        MatAssemblyBegin_MPIAIJ,
1595        MatAssemblyEnd_MPIAIJ,
1596        0,
1597        MatSetOption_MPIAIJ,
1598        MatZeroEntries_MPIAIJ,
1599        MatZeroRows_MPIAIJ,
1600        0,
1601        0,
1602        0,
1603        0,
1604        MatGetSize_MPIAIJ,
1605        MatGetLocalSize_MPIAIJ,
1606        MatGetOwnershipRange_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610        0,
1611        MatDuplicate_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615        0,
1616        0,
1617        MatGetSubMatrices_MPIAIJ,
1618        MatIncreaseOverlap_MPIAIJ,
1619        MatGetValues_MPIAIJ,
1620        MatCopy_MPIAIJ,
1621        MatPrintHelp_MPIAIJ,
1622        MatScale_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626        MatGetBlockSize_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631        MatFDColoringCreate_MPIAIJ,
1632        0,
1633        MatSetUnfactored_MPIAIJ,
1634        0,
1635        0,
1636        MatGetSubMatrix_MPIAIJ,
1637        0,
1638        0,
1639        MatGetMaps_Petsc};
1640 
1641 /* ----------------------------------------------------------------------------------------*/
1642 
1643 EXTERN_C_BEGIN
1644 #undef __FUNC__
1645 #define __FUNC__ "MatStoreValues_MPIAIJ"
1646 int MatStoreValues_MPIAIJ(Mat mat)
1647 {
1648   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1649   int        ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1653   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 EXTERN_C_END
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNC__
1660 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1661 int MatRetrieveValues_MPIAIJ(Mat mat)
1662 {
1663   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1664   int        ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1668   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 #include "pc.h"
1674 EXTERN_C_BEGIN
1675 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1676 EXTERN_C_END
1677 
1678 #undef __FUNC__
1679 #define __FUNC__ "MatCreateMPIAIJ"
1680 /*@C
1681    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1682    (the default parallel PETSc format).  For good matrix assembly performance
1683    the user should preallocate the matrix storage by setting the parameters
1684    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1685    performance can be increased by more than a factor of 50.
1686 
1687    Collective on MPI_Comm
1688 
1689    Input Parameters:
1690 +  comm - MPI communicator
1691 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1692            This value should be the same as the local size used in creating the
1693            y vector for the matrix-vector product y = Ax.
1694 .  n - This value should be the same as the local size used in creating the
1695        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1696        calculated if N is given) For square matrices n is almost always m.
1697 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1698 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1699 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1700            (same value is used for all local rows)
1701 .  d_nnz - array containing the number of nonzeros in the various rows of the
1702            DIAGONAL portion of the local submatrix (possibly different for each row)
1703            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1704            The size of this array is equal to the number of local rows, i.e 'm'.
1705            You must leave room for the diagonal entry even if it is zero.
1706 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1707            submatrix (same value is used for all local rows).
1708 -  o_nnz - array containing the number of nonzeros in the various rows of the
1709            OFF-DIAGONAL portion of the local submatrix (possibly different for
1710            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1711            structure. The size of this array is equal to the number
1712            of local rows, i.e 'm'.
1713 
1714    Output Parameter:
1715 .  A - the matrix
1716 
1717    Notes:
1718    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1719    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1720    storage requirements for this matrix.
1721 
1722    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1723    processor than it must be used on all processors that share the object for
1724    that argument.
1725 
1726    The AIJ format (also called the Yale sparse matrix format or
1727    compressed row storage), is fully compatible with standard Fortran 77
1728    storage.  That is, the stored row and column indices can begin at
1729    either one (as in Fortran) or zero.  See the users manual for details.
1730 
1731    The user MUST specify either the local or global matrix dimensions
1732    (possibly both).
1733 
1734    The parallel matrix is partitioned such that the first m0 rows belong to
1735    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1736    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1737 
1738    The DIAGONAL portion of the local submatrix of a processor can be defined
1739    as the submatrix which is obtained by extraction the part corresponding
1740    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1741    first row that belongs to the processor, and r2 is the last row belonging
1742    to the this processor. This is a square mxm matrix. The remaining portion
1743    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1744 
1745    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1746 
1747    By default, this format uses inodes (identical nodes) when possible.
1748    We search for consecutive rows with the same nonzero structure, thereby
1749    reusing matrix information to achieve increased efficiency.
1750 
1751    Options Database Keys:
1752 +  -mat_aij_no_inode  - Do not use inodes
1753 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1754 -  -mat_aij_oneindex - Internally use indexing starting at 1
1755         rather than 0.  Note that when calling MatSetValues(),
1756         the user still MUST index entries starting at 0!
1757 .   -mat_mpi - use the parallel matrix data structures even on one processor
1758                (defaults to using SeqBAIJ format on one processor)
1759 .   -mat_mpi - use the parallel matrix data structures even on one processor
1760                (defaults to using SeqAIJ format on one processor)
1761 
1762 
1763    Example usage:
1764 
1765    Consider the following 8x8 matrix with 34 non-zero values, that is
1766    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1767    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1768    as follows:
1769 
1770 .vb
1771             1  2  0  |  0  3  0  |  0  4
1772     Proc0   0  5  6  |  7  0  0  |  8  0
1773             9  0 10  | 11  0  0  | 12  0
1774     -------------------------------------
1775            13  0 14  | 15 16 17  |  0  0
1776     Proc1   0 18  0  | 19 20 21  |  0  0
1777             0  0  0  | 22 23  0  | 24  0
1778     -------------------------------------
1779     Proc2  25 26 27  |  0  0 28  | 29  0
1780            30  0  0  | 31 32 33  |  0 34
1781 .ve
1782 
1783    This can be represented as a collection of submatrices as:
1784 
1785 .vb
1786       A B C
1787       D E F
1788       G H I
1789 .ve
1790 
1791    Where the submatrices A,B,C are owned by proc0, D,E,F are
1792    owned by proc1, G,H,I are owned by proc2.
1793 
1794    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1795    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1796    The 'M','N' parameters are 8,8, and have the same values on all procs.
1797 
1798    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1799    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1800    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1801    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1802    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1803    matrix, ans [DF] as another SeqAIJ matrix.
1804 
1805    When d_nz, o_nz parameters are specified, d_nz storage elements are
1806    allocated for every row of the local diagonal submatrix, and o_nz
1807    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1808    One way to choose d_nz and o_nz is to use the max nonzerors per local
1809    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1810    In this case, the values of d_nz,o_nz are:
1811 .vb
1812      proc0 : dnz = 2, o_nz = 2
1813      proc1 : dnz = 3, o_nz = 2
1814      proc2 : dnz = 1, o_nz = 4
1815 .ve
1816    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1817    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1818    for proc3. i.e we are using 12+15+10=37 storage locations to store
1819    34 values.
1820 
1821    When d_nnz, o_nnz parameters are specified, the storage is specified
1822    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1823    In the above case the values for d_nnz,o_nnz are:
1824 .vb
1825      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1826      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1827      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1828 .ve
1829    Here the space allocated is sum of all the above values i.e 34, and
1830    hence pre-allocation is perfect.
1831 
1832    Level: intermediate
1833 
1834 .keywords: matrix, aij, compressed row, sparse, parallel
1835 
1836 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1837 @*/
1838 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1839 {
1840   Mat          B;
1841   Mat_MPIAIJ   *b;
1842   int          ierr, i,size,flag1 = 0, flag2 = 0;
1843 
1844   PetscFunctionBegin;
1845   MPI_Comm_size(comm,&size);
1846   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1847   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1848   if (!flag1 && !flag2 && size == 1) {
1849     if (M == PETSC_DECIDE) M = m;
1850     if (N == PETSC_DECIDE) N = n;
1851     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1852     PetscFunctionReturn(0);
1853   }
1854 
1855   *A = 0;
1856   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1857   PLogObjectCreate(B);
1858   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1859   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1860   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1861   B->ops->destroy    = MatDestroy_MPIAIJ;
1862   B->ops->view       = MatView_MPIAIJ;
1863   B->factor          = 0;
1864   B->assembled       = PETSC_FALSE;
1865   B->mapping         = 0;
1866 
1867   B->insertmode      = NOT_SET_VALUES;
1868   b->size            = size;
1869   MPI_Comm_rank(comm,&b->rank);
1870 
1871   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1872     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1873   }
1874 
1875   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1876   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1877   b->m = m; B->m = m;
1878   b->n = n; B->n = n;
1879   b->N = N; B->N = N;
1880   b->M = M; B->M = M;
1881 
1882   /* the information in the maps duplicates the information computed below, eventually
1883      we should remove the duplicate information that is not contained in the maps */
1884   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1885   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1886 
1887   /* build local table of row and column ownerships */
1888   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1889   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1890   b->cowners = b->rowners + b->size + 2;
1891   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1892   b->rowners[0] = 0;
1893   for ( i=2; i<=b->size; i++ ) {
1894     b->rowners[i] += b->rowners[i-1];
1895   }
1896   b->rstart = b->rowners[b->rank];
1897   b->rend   = b->rowners[b->rank+1];
1898   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1899   b->cowners[0] = 0;
1900   for ( i=2; i<=b->size; i++ ) {
1901     b->cowners[i] += b->cowners[i-1];
1902   }
1903   b->cstart = b->cowners[b->rank];
1904   b->cend   = b->cowners[b->rank+1];
1905 
1906   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1907   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1908   PLogObjectParent(B,b->A);
1909   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1910   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1911   PLogObjectParent(B,b->B);
1912 
1913   /* build cache for off array entries formed */
1914   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1915   b->donotstash  = 0;
1916   b->colmap      = 0;
1917   b->garray      = 0;
1918   b->roworiented = 1;
1919 
1920   /* stuff used for matrix vector multiply */
1921   b->lvec      = 0;
1922   b->Mvctx     = 0;
1923 
1924   /* stuff for MatGetRow() */
1925   b->rowindices   = 0;
1926   b->rowvalues    = 0;
1927   b->getrowactive = PETSC_FALSE;
1928 
1929   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1930                                      "MatStoreValues_MPIAIJ",
1931                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1932   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1933                                      "MatRetrieveValues_MPIAIJ",
1934                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1935   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1936                                      "MatGetDiagonalBlock_MPIAIJ",
1937                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1938   *A = B;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNC__
1943 #define __FUNC__ "MatDuplicate_MPIAIJ"
1944 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1945 {
1946   Mat        mat;
1947   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1948   int        ierr, len=0, flg;
1949 
1950   PetscFunctionBegin;
1951   *newmat       = 0;
1952   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1953   PLogObjectCreate(mat);
1954   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1955   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1956   mat->ops->destroy    = MatDestroy_MPIAIJ;
1957   mat->ops->view       = MatView_MPIAIJ;
1958   mat->factor     = matin->factor;
1959   mat->assembled  = PETSC_TRUE;
1960 
1961   a->m = mat->m   = oldmat->m;
1962   a->n = mat->n   = oldmat->n;
1963   a->M = mat->M   = oldmat->M;
1964   a->N = mat->N   = oldmat->N;
1965 
1966   a->rstart       = oldmat->rstart;
1967   a->rend         = oldmat->rend;
1968   a->cstart       = oldmat->cstart;
1969   a->cend         = oldmat->cend;
1970   a->size         = oldmat->size;
1971   a->rank         = oldmat->rank;
1972   mat->insertmode = NOT_SET_VALUES;
1973   a->rowvalues    = 0;
1974   a->getrowactive = PETSC_FALSE;
1975 
1976   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1977   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1978   a->cowners = a->rowners + a->size + 2;
1979   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1980   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1981   if (oldmat->colmap) {
1982 #if defined (USE_CTABLE)
1983     ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr);
1984 #else
1985     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1986     PLogObjectMemory(mat,(a->N)*sizeof(int));
1987     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1988 #endif
1989   } else a->colmap = 0;
1990   if (oldmat->garray) {
1991     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1992     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1993     PLogObjectMemory(mat,len*sizeof(int));
1994     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1995   } else a->garray = 0;
1996 
1997   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1998   PLogObjectParent(mat,a->lvec);
1999   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2000   PLogObjectParent(mat,a->Mvctx);
2001   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2002   PLogObjectParent(mat,a->A);
2003   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2004   PLogObjectParent(mat,a->B);
2005   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2006   if (flg) {
2007     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2008   }
2009   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2010   *newmat = mat;
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #include "sys.h"
2015 
2016 #undef __FUNC__
2017 #define __FUNC__ "MatLoad_MPIAIJ"
2018 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
2019 {
2020   Mat          A;
2021   Scalar       *vals,*svals;
2022   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2023   MPI_Status   status;
2024   int          i, nz, ierr, j,rstart, rend, fd;
2025   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
2026   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
2027   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
2028 
2029   PetscFunctionBegin;
2030   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2031   if (!rank) {
2032     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2033     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2034     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2035     if (header[3] < 0) {
2036       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
2037     }
2038   }
2039 
2040   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2041   M = header[1]; N = header[2];
2042   /* determine ownership of all rows */
2043   m = M/size + ((M % size) > rank);
2044   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
2045   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2046   rowners[0] = 0;
2047   for ( i=2; i<=size; i++ ) {
2048     rowners[i] += rowners[i-1];
2049   }
2050   rstart = rowners[rank];
2051   rend   = rowners[rank+1];
2052 
2053   /* distribute row lengths to all processors */
2054   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
2055   offlens = ourlens + (rend-rstart);
2056   if (!rank) {
2057     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
2058     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2059     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2060     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2061     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2062     PetscFree(sndcounts);
2063   } else {
2064     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2065   }
2066 
2067   if (!rank) {
2068     /* calculate the number of nonzeros on each processor */
2069     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2070     PetscMemzero(procsnz,size*sizeof(int));
2071     for ( i=0; i<size; i++ ) {
2072       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2073         procsnz[i] += rowlengths[j];
2074       }
2075     }
2076     PetscFree(rowlengths);
2077 
2078     /* determine max buffer needed and allocate it */
2079     maxnz = 0;
2080     for ( i=0; i<size; i++ ) {
2081       maxnz = PetscMax(maxnz,procsnz[i]);
2082     }
2083     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2084 
2085     /* read in my part of the matrix column indices  */
2086     nz     = procsnz[0];
2087     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2088     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2089 
2090     /* read in every one elses and ship off */
2091     for ( i=1; i<size; i++ ) {
2092       nz   = procsnz[i];
2093       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2094       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2095     }
2096     PetscFree(cols);
2097   } else {
2098     /* determine buffer space needed for message */
2099     nz = 0;
2100     for ( i=0; i<m; i++ ) {
2101       nz += ourlens[i];
2102     }
2103     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2104 
2105     /* receive message of column indices*/
2106     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2107     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2108     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2109   }
2110 
2111   /* determine column ownership if matrix is not square */
2112   if (N != M) {
2113     n      = N/size + ((N % size) > rank);
2114     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2115     cstart = cend - n;
2116   } else {
2117     cstart = rstart;
2118     cend   = rend;
2119     n      = cend - cstart;
2120   }
2121 
2122   /* loop over local rows, determining number of off diagonal entries */
2123   PetscMemzero(offlens,m*sizeof(int));
2124   jj = 0;
2125   for ( i=0; i<m; i++ ) {
2126     for ( j=0; j<ourlens[i]; j++ ) {
2127       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2128       jj++;
2129     }
2130   }
2131 
2132   /* create our matrix */
2133   for ( i=0; i<m; i++ ) {
2134     ourlens[i] -= offlens[i];
2135   }
2136   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2137   A = *newmat;
2138   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2139   for ( i=0; i<m; i++ ) {
2140     ourlens[i] += offlens[i];
2141   }
2142 
2143   if (!rank) {
2144     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2145 
2146     /* read in my part of the matrix numerical values  */
2147     nz   = procsnz[0];
2148     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2149 
2150     /* insert into matrix */
2151     jj      = rstart;
2152     smycols = mycols;
2153     svals   = vals;
2154     for ( i=0; i<m; i++ ) {
2155       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2156       smycols += ourlens[i];
2157       svals   += ourlens[i];
2158       jj++;
2159     }
2160 
2161     /* read in other processors and ship out */
2162     for ( i=1; i<size; i++ ) {
2163       nz   = procsnz[i];
2164       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2165       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2166     }
2167     PetscFree(procsnz);
2168   } else {
2169     /* receive numeric values */
2170     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2171 
2172     /* receive message of values*/
2173     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2174     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2175     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2176 
2177     /* insert into matrix */
2178     jj      = rstart;
2179     smycols = mycols;
2180     svals   = vals;
2181     for ( i=0; i<m; i++ ) {
2182       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2183       smycols += ourlens[i];
2184       svals   += ourlens[i];
2185       jj++;
2186     }
2187   }
2188   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2189 
2190   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2191   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNC__
2196 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2197 /*
2198     Not great since it makes two copies of the submatrix, first an SeqAIJ
2199   in local and then by concatenating the local matrices the end result.
2200   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2201 */
2202 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2203 {
2204   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2205   Mat        *local,M, Mreuse;
2206   Scalar     *vwork,*aa;
2207   MPI_Comm   comm = mat->comm;
2208   Mat_SeqAIJ *aij;
2209   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2210 
2211   PetscFunctionBegin;
2212   MPI_Comm_rank(comm,&rank);
2213   MPI_Comm_size(comm,&size);
2214 
2215   if (call ==  MAT_REUSE_MATRIX) {
2216     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2217     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2218     local = &Mreuse;
2219     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2220   } else {
2221     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2222     Mreuse = *local;
2223     PetscFree(local);
2224   }
2225 
2226   /*
2227       m - number of local rows
2228       n - number of columns (same on all processors)
2229       rstart - first row in new global matrix generated
2230   */
2231   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2232   if (call == MAT_INITIAL_MATRIX) {
2233     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2234     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2235     ii  = aij->i;
2236     jj  = aij->j;
2237 
2238     /*
2239         Determine the number of non-zeros in the diagonal and off-diagonal
2240         portions of the matrix in order to do correct preallocation
2241     */
2242 
2243     /* first get start and end of "diagonal" columns */
2244     if (csize == PETSC_DECIDE) {
2245       nlocal = n/size + ((n % size) > rank);
2246     } else {
2247       nlocal = csize;
2248     }
2249     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2250     rstart = rend - nlocal;
2251     if (rank == size - 1 && rend != n) {
2252       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2253     }
2254 
2255     /* next, compute all the lengths */
2256     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2257     olens = dlens + m;
2258     for ( i=0; i<m; i++ ) {
2259       jend = ii[i+1] - ii[i];
2260       olen = 0;
2261       dlen = 0;
2262       for ( j=0; j<jend; j++ ) {
2263         if ( *jj < rstart || *jj >= rend) olen++;
2264         else dlen++;
2265         jj++;
2266       }
2267       olens[i] = olen;
2268       dlens[i] = dlen;
2269     }
2270     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2271     PetscFree(dlens);
2272   } else {
2273     int ml,nl;
2274 
2275     M = *newmat;
2276     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2277     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2278     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2279     /*
2280          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2281        rather than the slower MatSetValues().
2282     */
2283     M->was_assembled = PETSC_TRUE;
2284     M->assembled     = PETSC_FALSE;
2285   }
2286   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2287   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2288   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2289   ii  = aij->i;
2290   jj  = aij->j;
2291   aa  = aij->a;
2292   for (i=0; i<m; i++) {
2293     row   = rstart + i;
2294     nz    = ii[i+1] - ii[i];
2295     cwork = jj;     jj += nz;
2296     vwork = aa;     aa += nz;
2297     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2298   }
2299 
2300   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2301   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2302   *newmat = M;
2303 
2304   /* save submatrix used in processor for next request */
2305   if (call ==  MAT_INITIAL_MATRIX) {
2306     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2307     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2308   }
2309 
2310   PetscFunctionReturn(0);
2311 }
2312