xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e80fee0d83fca2e79359165ac082b818fca7907f)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: mpiaij.c,v 1.245 1998/05/11 22:07:18 bsmith Exp bsmith $";
4 #endif
5 
6 #include "pinclude/pviewer.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 
11 /* local utility routine that creates a mapping from the global column
12 number to the local number in the off-diagonal part of the local
13 storage of the matrix.  This is done in a non scable way since the
14 length of colmap equals the global matrix length.
15 */
16 #undef __FUNC__
17 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
18 int CreateColmap_MPIAIJ_Private(Mat mat)
19 {
20   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
21   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
22   int        n = B->n,i;
23 
24   PetscFunctionBegin;
25   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
26   PLogObjectMemory(mat,aij->N*sizeof(int));
27   PetscMemzero(aij->colmap,aij->N*sizeof(int));
28   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
29   PetscFunctionReturn(0);
30 }
31 
32 extern int DisAssemble_MPIAIJ(Mat);
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for ( _i=0; _i<nrow; _i++ ) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
61         Scalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
67         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
68         new_j   = (int *) (new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
73         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int)); \
78         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
79         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(Scalar));  \
81         /* free up old matrix storage */ \
82  \
83         PetscFree(a->a);  \
84         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = 1; \
87  \
88         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
89         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
90         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for ( ii=N; ii>=_i; ii-- ) { \
97         rp[ii+1] = rp[ii]; \
98         ap[ii+1] = ap[ii]; \
99       } \
100       rp[_i] = col1;  \
101       ap[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108  \
109     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
110     rmax = bimax[row]; nrow = bilen[row];  \
111     col1 = col - shift; \
112      \
113     low = 0; high = nrow; \
114     while (high-low > 5) { \
115       t = (low+high)/2; \
116       if (rp[t] > col) high = t; \
117       else             low  = t; \
118     } \
119        for ( _i=0; _i<nrow; _i++ ) { \
120         if (rp[_i] > col1) break; \
121         if (rp[_i] == col1) { \
122           if (addv == ADD_VALUES) ap[_i] += value;   \
123           else                  ap[_i] = value; \
124           goto b_noinsert; \
125         } \
126       }  \
127       if (nonew == 1) goto b_noinsert; \
128       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
129       if (nrow >= rmax) { \
130         /* there is no extra room in row, therefore enlarge */ \
131         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
132         Scalar *new_a; \
133  \
134         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
135  \
136         /* malloc new storage space */ \
137         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
138         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
139         new_j   = (int *) (new_a + new_nz); \
140         new_i   = new_j + new_nz; \
141  \
142         /* copy over old data into new slots */ \
143         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
144         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
145         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
146         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
147         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
148                                                            len*sizeof(int)); \
149         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
150         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
151                                                            len*sizeof(Scalar));  \
152         /* free up old matrix storage */ \
153  \
154         PetscFree(b->a);  \
155         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
156         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
157         b->singlemalloc = 1; \
158  \
159         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
160         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
161         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
162         b->maxnz += CHUNKSIZE; \
163         b->reallocs++; \
164       } \
165       N = nrow++ - 1; b->nz++; \
166       /* shift up all the later entries in this row */ \
167       for ( ii=N; ii>=_i; ii-- ) { \
168         rp[ii+1] = rp[ii]; \
169         ap[ii+1] = ap[ii]; \
170       } \
171       rp[_i] = col1;  \
172       ap[_i] = value;  \
173       b_noinsert: ; \
174       bilen[row] = nrow; \
175 }
176 
177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
178 #undef __FUNC__
179 #define __FUNC__ "MatSetValues_MPIAIJ"
180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
181 {
182   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
183   Scalar     value;
184   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
185   int        cstart = aij->cstart, cend = aij->cend,row,col;
186   int        roworiented = aij->roworiented;
187 
188   /* Some Variables required in the macro */
189   Mat        A = aij->A;
190   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
191   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
192   Scalar     *aa = a->a;
193 
194   Mat        B = aij->B;
195   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
196   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
197   Scalar     *ba = b->a;
198 
199   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
200   int        nonew = a->nonew,shift = a->indexshift;
201   Scalar     *ap;
202 
203   PetscFunctionBegin;
204   for ( i=0; i<m; i++ ) {
205 #if defined(USE_PETSC_BOPT_g)
206     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
207     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
208 #endif
209     if (im[i] >= rstart && im[i] < rend) {
210       row = im[i] - rstart;
211       for ( j=0; j<n; j++ ) {
212         if (in[j] >= cstart && in[j] < cend){
213           col = in[j] - cstart;
214           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
215           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
216           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
217         }
218 #if defined(USE_PETSC_BOPT_g)
219         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
220         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
221 #endif
222         else {
223           if (mat->was_assembled) {
224             if (!aij->colmap) {
225               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
226             }
227             col = aij->colmap[in[j]] - 1;
228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
229               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
230               col =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
232               B = aij->B;
233               b = (Mat_SeqAIJ *) B->data;
234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
235               ba = b->a;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
240           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
241         }
242       }
243     }
244     else {
245       if (roworiented && !aij->donotstash) {
246         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
247       }
248       else {
249         if (!aij->donotstash) {
250           row = im[i];
251           for ( j=0; j<n; j++ ) {
252             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
253           }
254         }
255       }
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNC__
262 #define __FUNC__ "MatGetValues_MPIAIJ"
263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
264 {
265   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
266   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
267   int        cstart = aij->cstart, cend = aij->cend,row,col;
268 
269   PetscFunctionBegin;
270   for ( i=0; i<m; i++ ) {
271     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
272     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
273     if (idxm[i] >= rstart && idxm[i] < rend) {
274       row = idxm[i] - rstart;
275       for ( j=0; j<n; j++ ) {
276         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
277         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
278         if (idxn[j] >= cstart && idxn[j] < cend){
279           col = idxn[j] - cstart;
280           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
281         }
282         else {
283           if (!aij->colmap) {
284             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
285           }
286           col = aij->colmap[idxn[j]] - 1;
287           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
288           else {
289             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
290           }
291         }
292       }
293     } else {
294       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNC__
301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
303 {
304   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
305   MPI_Comm    comm = mat->comm;
306   int         size = aij->size, *owners = aij->rowners;
307   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
308   MPI_Request *send_waits,*recv_waits;
309   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
310   InsertMode  addv;
311   Scalar      *rvalues,*svalues;
312 
313   PetscFunctionBegin;
314   if (aij->donotstash) {
315     aij->svalues    = 0; aij->rvalues    = 0;
316     aij->nsends     = 0; aij->nrecvs     = 0;
317     aij->send_waits = 0; aij->recv_waits = 0;
318     aij->rmax       = 0;
319     PetscFunctionReturn(0);
320   }
321 
322   /* make sure all processors are either in INSERTMODE or ADDMODE */
323   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
324   if (addv == (ADD_VALUES|INSERT_VALUES)) {
325     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
326   }
327   mat->insertmode = addv; /* in case this processor had no cache */
328 
329   /*  first count number of contributors to each processor */
330   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
331   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
332   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
333   for ( i=0; i<aij->stash.n; i++ ) {
334     idx = aij->stash.idx[i];
335     for ( j=0; j<size; j++ ) {
336       if (idx >= owners[j] && idx < owners[j+1]) {
337         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
338       }
339     }
340   }
341   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
342 
343   /* inform other processors of number of messages and max length*/
344   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
345   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
346   nreceives = work[rank];
347   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
348   nmax = work[rank];
349   PetscFree(work);
350 
351   /* post receives:
352        1) each message will consist of ordered pairs
353      (global index,value) we store the global index as a double
354      to simplify the message passing.
355        2) since we don't know how long each individual message is we
356      allocate the largest needed buffer for each receive. Potentially
357      this is a lot of wasted space.
358 
359 
360        This could be done better.
361   */
362   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
363   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
364   for ( i=0; i<nreceives; i++ ) {
365     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
366               comm,recv_waits+i);CHKERRQ(ierr);
367   }
368 
369   /* do sends:
370       1) starts[i] gives the starting index in svalues for stuff going to
371          the ith processor
372   */
373   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
374   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
375   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
376   starts[0]  = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   for ( i=0; i<aij->stash.n; i++ ) {
379     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
380     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
381     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
382   }
383   PetscFree(owner);
384   starts[0] = 0;
385   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
386   count = 0;
387   for ( i=0; i<size; i++ ) {
388     if (procs[i]) {
389       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
390     }
391   }
392   PetscFree(starts);
393   PetscFree(nprocs);
394 
395   /* Free cache space */
396   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
397   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
398 
399   aij->svalues    = svalues;    aij->rvalues    = rvalues;
400   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
401   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
402   aij->rmax       = nmax;
403 
404   PetscFunctionReturn(0);
405 }
406 extern int MatSetUpMultiply_MPIAIJ(Mat);
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
410 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
411 {
412   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
413   MPI_Status  *send_status,recv_status;
414   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
415   int         row,col,other_disassembled;
416   Scalar      *values,val;
417   InsertMode  addv = mat->insertmode;
418 
419   PetscFunctionBegin;
420 
421   /*  wait on receives */
422   while (count) {
423     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
424     /* unpack receives into our local space */
425     values = aij->rvalues + 3*imdex*aij->rmax;
426     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
427     n = n/3;
428     for ( i=0; i<n; i++ ) {
429       row = (int) PetscReal(values[3*i]) - aij->rstart;
430       col = (int) PetscReal(values[3*i+1]);
431       val = values[3*i+2];
432       if (col >= aij->cstart && col < aij->cend) {
433         col -= aij->cstart;
434         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
435       } else {
436         if (mat->was_assembled || mat->assembled) {
437           if (!aij->colmap) {
438             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
439           }
440           col = aij->colmap[col] - 1;
441           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
442             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
443             col = (int) PetscReal(values[3*i+1]);
444           }
445         }
446         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
447       }
448     }
449     count--;
450   }
451   if (aij->recv_waits) PetscFree(aij->recv_waits);
452   if (aij->rvalues)    PetscFree(aij->rvalues);
453 
454   /* wait on sends */
455   if (aij->nsends) {
456     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
457     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
458     PetscFree(send_status);
459   }
460   if (aij->send_waits) PetscFree(aij->send_waits);
461   if (aij->svalues)    PetscFree(aij->svalues);
462 
463   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
464   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
465 
466   /* determine if any processor has disassembled, if so we must
467      also disassemble ourselfs, in order that we may reassemble. */
468   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
469   if (mat->was_assembled && !other_disassembled) {
470     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
471   }
472 
473   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
474     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
475   }
476   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
477   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
478 
479   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNC__
484 #define __FUNC__ "MatZeroEntries_MPIAIJ"
485 int MatZeroEntries_MPIAIJ(Mat A)
486 {
487   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
488   int        ierr;
489 
490   PetscFunctionBegin;
491   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
492   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNC__
497 #define __FUNC__ "MatZeroRows_MPIAIJ"
498 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
499 {
500   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
501   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
502   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
503   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
504   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
505   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
506   MPI_Comm       comm = A->comm;
507   MPI_Request    *send_waits,*recv_waits;
508   MPI_Status     recv_status,*send_status;
509   IS             istmp;
510   PetscTruth     localdiag;
511 
512   PetscFunctionBegin;
513   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
514   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
515 
516   /*  first count number of contributors to each processor */
517   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
518   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
519   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
520   for ( i=0; i<N; i++ ) {
521     idx = rows[i];
522     found = 0;
523     for ( j=0; j<size; j++ ) {
524       if (idx >= owners[j] && idx < owners[j+1]) {
525         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
526       }
527     }
528     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
529   }
530   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
531 
532   /* inform other processors of number of messages and max length*/
533   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
534   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
535   nrecvs = work[rank];
536   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
537   nmax = work[rank];
538   PetscFree(work);
539 
540   /* post receives:   */
541   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
542   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
543   for ( i=0; i<nrecvs; i++ ) {
544     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
545   }
546 
547   /* do sends:
548       1) starts[i] gives the starting index in svalues for stuff going to
549          the ith processor
550   */
551   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
552   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
553   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
554   starts[0] = 0;
555   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
556   for ( i=0; i<N; i++ ) {
557     svalues[starts[owner[i]]++] = rows[i];
558   }
559   ISRestoreIndices(is,&rows);
560 
561   starts[0] = 0;
562   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
563   count = 0;
564   for ( i=0; i<size; i++ ) {
565     if (procs[i]) {
566       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
567     }
568   }
569   PetscFree(starts);
570 
571   base = owners[rank];
572 
573   /*  wait on receives */
574   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
575   source = lens + nrecvs;
576   count  = nrecvs; slen = 0;
577   while (count) {
578     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
579     /* unpack receives into our local space */
580     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
581     source[imdex]  = recv_status.MPI_SOURCE;
582     lens[imdex]  = n;
583     slen += n;
584     count--;
585   }
586   PetscFree(recv_waits);
587 
588   /* move the data into the send scatter */
589   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
590   count = 0;
591   for ( i=0; i<nrecvs; i++ ) {
592     values = rvalues + i*nmax;
593     for ( j=0; j<lens[i]; j++ ) {
594       lrows[count++] = values[j] - base;
595     }
596   }
597   PetscFree(rvalues); PetscFree(lens);
598   PetscFree(owner); PetscFree(nprocs);
599 
600   /* actually zap the local rows */
601   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
602   PLogObjectParent(A,istmp);
603 
604   /*
605         Zero the required rows. If the "diagonal block" of the matrix
606      is square and the user wishes to set the diagonal we use seperate
607      code so that MatSetValues() is not called for each diagonal allocating
608      new memory, thus calling lots of mallocs and slowing things down.
609 
610        Contributed by: Mathew Knepley
611   */
612   localdiag = PETSC_FALSE;
613   if (diag && (l->A->M == l->A->N)) {
614     localdiag = PETSC_TRUE;
615     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
616   } else {
617     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
618   }
619   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
620   ierr = ISDestroy(istmp); CHKERRQ(ierr);
621 
622   if (diag && (localdiag == PETSC_FALSE)) {
623     for ( i = 0; i < slen; i++ ) {
624       row = lrows[i] + rstart;
625       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
626     }
627     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
628     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
629   }
630 
631   if (diag) {
632     for ( i = 0; i < slen; i++ ) {
633       row = lrows[i] + rstart;
634       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
635     }
636     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
637     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
638   }
639   PetscFree(lrows);
640 
641   /* wait on sends */
642   if (nsends) {
643     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
644     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
645     PetscFree(send_status);
646   }
647   PetscFree(send_waits); PetscFree(svalues);
648 
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNC__
653 #define __FUNC__ "MatMult_MPIAIJ"
654 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
655 {
656   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
657   int        ierr,nt;
658 
659   PetscFunctionBegin;
660   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
661   if (nt != a->n) {
662     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
663   }
664   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
665   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
666   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
667   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultAdd_MPIAIJ"
673 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   PetscFunctionBegin;
679   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
680   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
681   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
682   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNC__
687 #define __FUNC__ "MatMultTrans_MPIAIJ"
688 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
689 {
690   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
691   int        ierr;
692 
693   PetscFunctionBegin;
694   /* do nondiagonal part */
695   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
696   /* send it on its way */
697   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
698   /* do local part */
699   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
700   /* receive remote parts: note this assumes the values are not actually */
701   /* inserted in yy until the next line, which is true for my implementation*/
702   /* but is not perhaps always true. */
703   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
709 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
710 {
711   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
712   int        ierr;
713 
714   PetscFunctionBegin;
715   /* do nondiagonal part */
716   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
717   /* send it on its way */
718   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
719   /* do local part */
720   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
721   /* receive remote parts: note this assumes the values are not actually */
722   /* inserted in yy until the next line, which is true for my implementation*/
723   /* but is not perhaps always true. */
724   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   This only works correctly for square matrices where the subblock A->A is the
730    diagonal block
731 */
732 #undef __FUNC__
733 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
734 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
735 {
736   int        ierr;
737   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
738 
739   PetscFunctionBegin;
740   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
741   if (a->rstart != a->cstart || a->rend != a->cend) {
742     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
743   }
744   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNC__
749 #define __FUNC__ "MatScale_MPIAIJ"
750 int MatScale_MPIAIJ(Scalar *aa,Mat A)
751 {
752   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
753   int        ierr;
754 
755   PetscFunctionBegin;
756   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
757   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNC__
762 #define __FUNC__ "MatDestroy_MPIAIJ"
763 int MatDestroy_MPIAIJ(Mat mat)
764 {
765   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
766   int        ierr;
767 
768   PetscFunctionBegin;
769 #if defined(USE_PETSC_LOG)
770   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
771 #endif
772   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
773   PetscFree(aij->rowners);
774   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
775   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
776   if (aij->colmap) PetscFree(aij->colmap);
777   if (aij->garray) PetscFree(aij->garray);
778   if (aij->lvec)   VecDestroy(aij->lvec);
779   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
780   if (aij->rowvalues) PetscFree(aij->rowvalues);
781   PetscFree(aij);
782   PLogObjectDestroy(mat);
783   PetscHeaderDestroy(mat);
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNC__
788 #define __FUNC__ "MatView_MPIAIJ_Binary"
789 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
790 {
791   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
792   int         ierr;
793 
794   PetscFunctionBegin;
795   if (aij->size == 1) {
796     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
797   }
798   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNC__
803 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
804 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
805 {
806   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
807   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
808   int         ierr, format,shift = C->indexshift,rank;
809   FILE        *fd;
810   ViewerType  vtype;
811 
812   PetscFunctionBegin;
813   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
814   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
815     ierr = ViewerGetFormat(viewer,&format);
816     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
817       MatInfo info;
818       int     flg;
819       MPI_Comm_rank(mat->comm,&rank);
820       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
821       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
822       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
823       PetscSequentialPhaseBegin(mat->comm,1);
824       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
825          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
826       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
827          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
828       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
829       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
830       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
831       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
832       fflush(fd);
833       PetscSequentialPhaseEnd(mat->comm,1);
834       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
835       PetscFunctionReturn(0);
836     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
837       PetscFunctionReturn(0);
838     }
839   }
840 
841   if (vtype == DRAW_VIEWER) {
842     Draw       draw;
843     PetscTruth isnull;
844     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
845     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
846   }
847 
848   if (vtype == ASCII_FILE_VIEWER) {
849     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
850     PetscSequentialPhaseBegin(mat->comm,1);
851     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
852            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
853            aij->cend);
854     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
855     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
856     fflush(fd);
857     PetscSequentialPhaseEnd(mat->comm,1);
858   } else {
859     int size = aij->size;
860     rank = aij->rank;
861     if (size == 1) {
862       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
863     } else {
864       /* assemble the entire matrix onto first processor. */
865       Mat         A;
866       Mat_SeqAIJ *Aloc;
867       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
868       Scalar      *a;
869 
870       if (!rank) {
871         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
872       } else {
873         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
874       }
875       PLogObjectParent(mat,A);
876 
877       /* copy over the A part */
878       Aloc = (Mat_SeqAIJ*) aij->A->data;
879       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
880       row = aij->rstart;
881       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
882       for ( i=0; i<m; i++ ) {
883         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
884         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
885       }
886       aj = Aloc->j;
887       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
888 
889       /* copy over the B part */
890       Aloc = (Mat_SeqAIJ*) aij->B->data;
891       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
892       row = aij->rstart;
893       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
894       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
895       for ( i=0; i<m; i++ ) {
896         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
897         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
898       }
899       PetscFree(ct);
900       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
901       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
902       /*
903          Everyone has to call to draw the matrix since the graphics waits are
904          synchronized across all processors that share the Draw object
905       */
906       if (!rank || vtype == DRAW_VIEWER) {
907         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
908       }
909       ierr = MatDestroy(A); CHKERRQ(ierr);
910     }
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNC__
916 #define __FUNC__ "MatView_MPIAIJ"
917 int MatView_MPIAIJ(Mat mat,Viewer viewer)
918 {
919   int         ierr;
920   ViewerType  vtype;
921 
922   PetscFunctionBegin;
923   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
924   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
925       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
926     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
927   } else if (vtype == BINARY_FILE_VIEWER) {
928     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
929   } else {
930     SETERRQ(1,1,"Viewer type not supported by PETSc object");
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 /*
936     This has to provide several versions.
937 
938      2) a) use only local smoothing updating outer values only once.
939         b) local smoothing updating outer values each inner iteration
940      3) color updating out values betwen colors.
941 */
942 #undef __FUNC__
943 #define __FUNC__ "MatRelax_MPIAIJ"
944 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
945                            double fshift,int its,Vec xx)
946 {
947   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
948   Mat        AA = mat->A, BB = mat->B;
949   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
950   Scalar     *b,*x,*xs,*ls,d,*v,sum;
951   int        ierr,*idx, *diag;
952   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
953 
954   PetscFunctionBegin;
955   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
956   xs = x + shift; /* shift by one for index start of 1 */
957   ls = ls + shift;
958   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
959   diag = A->diag;
960   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
961     if (flag & SOR_ZERO_INITIAL_GUESS) {
962       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
966     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
967     while (its--) {
968       /* go down through the rows */
969       for ( i=0; i<m; i++ ) {
970         n    = A->i[i+1] - A->i[i];
971         idx  = A->j + A->i[i] + shift;
972         v    = A->a + A->i[i] + shift;
973         sum  = b[i];
974         SPARSEDENSEMDOT(sum,xs,v,idx,n);
975         d    = fshift + A->a[diag[i]+shift];
976         n    = B->i[i+1] - B->i[i];
977         idx  = B->j + B->i[i] + shift;
978         v    = B->a + B->i[i] + shift;
979         SPARSEDENSEMDOT(sum,ls,v,idx,n);
980         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
981       }
982       /* come up through the rows */
983       for ( i=m-1; i>-1; i-- ) {
984         n    = A->i[i+1] - A->i[i];
985         idx  = A->j + A->i[i] + shift;
986         v    = A->a + A->i[i] + shift;
987         sum  = b[i];
988         SPARSEDENSEMDOT(sum,xs,v,idx,n);
989         d    = fshift + A->a[diag[i]+shift];
990         n    = B->i[i+1] - B->i[i];
991         idx  = B->j + B->i[i] + shift;
992         v    = B->a + B->i[i] + shift;
993         SPARSEDENSEMDOT(sum,ls,v,idx,n);
994         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
995       }
996     }
997   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
998     if (flag & SOR_ZERO_INITIAL_GUESS) {
999       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1000       PetscFunctionReturn(0);
1001     }
1002     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1003     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1004     while (its--) {
1005       for ( i=0; i<m; i++ ) {
1006         n    = A->i[i+1] - A->i[i];
1007         idx  = A->j + A->i[i] + shift;
1008         v    = A->a + A->i[i] + shift;
1009         sum  = b[i];
1010         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1011         d    = fshift + A->a[diag[i]+shift];
1012         n    = B->i[i+1] - B->i[i];
1013         idx  = B->j + B->i[i] + shift;
1014         v    = B->a + B->i[i] + shift;
1015         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1016         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1017       }
1018     }
1019   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1020     if (flag & SOR_ZERO_INITIAL_GUESS) {
1021       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1022       PetscFunctionReturn(0);
1023     }
1024     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1025                             mat->Mvctx); CHKERRQ(ierr);
1026     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1027                             mat->Mvctx); CHKERRQ(ierr);
1028     while (its--) {
1029       for ( i=m-1; i>-1; i-- ) {
1030         n    = A->i[i+1] - A->i[i];
1031         idx  = A->j + A->i[i] + shift;
1032         v    = A->a + A->i[i] + shift;
1033         sum  = b[i];
1034         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1035         d    = fshift + A->a[diag[i]+shift];
1036         n    = B->i[i+1] - B->i[i];
1037         idx  = B->j + B->i[i] + shift;
1038         v    = B->a + B->i[i] + shift;
1039         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1040         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1041       }
1042     }
1043   } else {
1044     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNC__
1050 #define __FUNC__ "MatGetInfo_MPIAIJ"
1051 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1052 {
1053   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1054   Mat        A = mat->A, B = mat->B;
1055   int        ierr;
1056   double     isend[5], irecv[5];
1057 
1058   PetscFunctionBegin;
1059   info->block_size     = 1.0;
1060   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1061   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1062   isend[3] = info->memory;  isend[4] = info->mallocs;
1063   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1064   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1065   isend[3] += info->memory;  isend[4] += info->mallocs;
1066   if (flag == MAT_LOCAL) {
1067     info->nz_used      = isend[0];
1068     info->nz_allocated = isend[1];
1069     info->nz_unneeded  = isend[2];
1070     info->memory       = isend[3];
1071     info->mallocs      = isend[4];
1072   } else if (flag == MAT_GLOBAL_MAX) {
1073     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1074     info->nz_used      = irecv[0];
1075     info->nz_allocated = irecv[1];
1076     info->nz_unneeded  = irecv[2];
1077     info->memory       = irecv[3];
1078     info->mallocs      = irecv[4];
1079   } else if (flag == MAT_GLOBAL_SUM) {
1080     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1081     info->nz_used      = irecv[0];
1082     info->nz_allocated = irecv[1];
1083     info->nz_unneeded  = irecv[2];
1084     info->memory       = irecv[3];
1085     info->mallocs      = irecv[4];
1086   }
1087   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1088   info->fill_ratio_needed = 0;
1089   info->factor_mallocs    = 0;
1090   info->rows_global       = (double)mat->M;
1091   info->columns_global    = (double)mat->N;
1092   info->rows_local        = (double)mat->m;
1093   info->columns_local     = (double)mat->N;
1094 
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNC__
1099 #define __FUNC__ "MatSetOption_MPIAIJ"
1100 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1101 {
1102   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1103 
1104   PetscFunctionBegin;
1105   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1106       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1107       op == MAT_COLUMNS_UNSORTED ||
1108       op == MAT_COLUMNS_SORTED ||
1109       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1110       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1111         MatSetOption(a->A,op);
1112         MatSetOption(a->B,op);
1113   } else if (op == MAT_ROW_ORIENTED) {
1114     a->roworiented = 1;
1115     MatSetOption(a->A,op);
1116     MatSetOption(a->B,op);
1117   } else if (op == MAT_ROWS_SORTED ||
1118              op == MAT_ROWS_UNSORTED ||
1119              op == MAT_SYMMETRIC ||
1120              op == MAT_STRUCTURALLY_SYMMETRIC ||
1121              op == MAT_YES_NEW_DIAGONALS)
1122     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1123   else if (op == MAT_COLUMN_ORIENTED) {
1124     a->roworiented = 0;
1125     MatSetOption(a->A,op);
1126     MatSetOption(a->B,op);
1127   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1128     a->donotstash = 1;
1129   } else if (op == MAT_NO_NEW_DIAGONALS){
1130     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1131   } else {
1132     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNC__
1138 #define __FUNC__ "MatGetSize_MPIAIJ"
1139 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1140 {
1141   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1142 
1143   PetscFunctionBegin;
1144   if (m) *m = mat->M;
1145   if (n) *n = mat->N;
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNC__
1150 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1151 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1152 {
1153   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1154 
1155   PetscFunctionBegin;
1156   if (m) *m = mat->m;
1157   if (n) *n = mat->n;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNC__
1162 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1163 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1164 {
1165   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1166 
1167   PetscFunctionBegin;
1168   *m = mat->rstart; *n = mat->rend;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1173 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1174 
1175 #undef __FUNC__
1176 #define __FUNC__ "MatGetRow_MPIAIJ"
1177 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1178 {
1179   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1180   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1181   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1182   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1183   int        *cmap, *idx_p;
1184 
1185   PetscFunctionBegin;
1186   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1187   mat->getrowactive = PETSC_TRUE;
1188 
1189   if (!mat->rowvalues && (idx || v)) {
1190     /*
1191         allocate enough space to hold information from the longest row.
1192     */
1193     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1194     int     max = 1,m = mat->m,tmp;
1195     for ( i=0; i<m; i++ ) {
1196       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1197       if (max < tmp) { max = tmp; }
1198     }
1199     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1200     CHKPTRQ(mat->rowvalues);
1201     mat->rowindices = (int *) (mat->rowvalues + max);
1202   }
1203 
1204   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1205   lrow = row - rstart;
1206 
1207   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1208   if (!v)   {pvA = 0; pvB = 0;}
1209   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1210   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1211   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1212   nztot = nzA + nzB;
1213 
1214   cmap  = mat->garray;
1215   if (v  || idx) {
1216     if (nztot) {
1217       /* Sort by increasing column numbers, assuming A and B already sorted */
1218       int imark = -1;
1219       if (v) {
1220         *v = v_p = mat->rowvalues;
1221         for ( i=0; i<nzB; i++ ) {
1222           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1223           else break;
1224         }
1225         imark = i;
1226         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1227         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1228       }
1229       if (idx) {
1230         *idx = idx_p = mat->rowindices;
1231         if (imark > -1) {
1232           for ( i=0; i<imark; i++ ) {
1233             idx_p[i] = cmap[cworkB[i]];
1234           }
1235         } else {
1236           for ( i=0; i<nzB; i++ ) {
1237             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1238             else break;
1239           }
1240           imark = i;
1241         }
1242         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1243         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1244       }
1245     }
1246     else {
1247       if (idx) *idx = 0;
1248       if (v)   *v   = 0;
1249     }
1250   }
1251   *nz = nztot;
1252   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1253   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNC__
1258 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1260 {
1261   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1262 
1263   PetscFunctionBegin;
1264   if (aij->getrowactive == PETSC_FALSE) {
1265     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1266   }
1267   aij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ "MatNorm_MPIAIJ"
1273 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1274 {
1275   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1276   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1277   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1278   double     sum = 0.0;
1279   Scalar     *v;
1280 
1281   PetscFunctionBegin;
1282   if (aij->size == 1) {
1283     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1284   } else {
1285     if (type == NORM_FROBENIUS) {
1286       v = amat->a;
1287       for (i=0; i<amat->nz; i++ ) {
1288 #if defined(USE_PETSC_COMPLEX)
1289         sum += real(conj(*v)*(*v)); v++;
1290 #else
1291         sum += (*v)*(*v); v++;
1292 #endif
1293       }
1294       v = bmat->a;
1295       for (i=0; i<bmat->nz; i++ ) {
1296 #if defined(USE_PETSC_COMPLEX)
1297         sum += real(conj(*v)*(*v)); v++;
1298 #else
1299         sum += (*v)*(*v); v++;
1300 #endif
1301       }
1302       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1303       *norm = sqrt(*norm);
1304     } else if (type == NORM_1) { /* max column norm */
1305       double *tmp, *tmp2;
1306       int    *jj, *garray = aij->garray;
1307       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1308       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1309       PetscMemzero(tmp,aij->N*sizeof(double));
1310       *norm = 0.0;
1311       v = amat->a; jj = amat->j;
1312       for ( j=0; j<amat->nz; j++ ) {
1313         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1314       }
1315       v = bmat->a; jj = bmat->j;
1316       for ( j=0; j<bmat->nz; j++ ) {
1317         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1318       }
1319       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1320       for ( j=0; j<aij->N; j++ ) {
1321         if (tmp2[j] > *norm) *norm = tmp2[j];
1322       }
1323       PetscFree(tmp); PetscFree(tmp2);
1324     } else if (type == NORM_INFINITY) { /* max row norm */
1325       double ntemp = 0.0;
1326       for ( j=0; j<amat->m; j++ ) {
1327         v = amat->a + amat->i[j] + shift;
1328         sum = 0.0;
1329         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1330           sum += PetscAbsScalar(*v); v++;
1331         }
1332         v = bmat->a + bmat->i[j] + shift;
1333         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1334           sum += PetscAbsScalar(*v); v++;
1335         }
1336         if (sum > ntemp) ntemp = sum;
1337       }
1338       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1339     } else {
1340       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1341     }
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNC__
1347 #define __FUNC__ "MatTranspose_MPIAIJ"
1348 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1349 {
1350   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1351   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1352   int        ierr,shift = Aloc->indexshift;
1353   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1354   Mat        B;
1355   Scalar     *array;
1356 
1357   PetscFunctionBegin;
1358   if (matout == PETSC_NULL && M != N) {
1359     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1360   }
1361 
1362   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1363 
1364   /* copy over the A part */
1365   Aloc = (Mat_SeqAIJ*) a->A->data;
1366   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1367   row = a->rstart;
1368   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1369   for ( i=0; i<m; i++ ) {
1370     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1371     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1372   }
1373   aj = Aloc->j;
1374   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1375 
1376   /* copy over the B part */
1377   Aloc = (Mat_SeqAIJ*) a->B->data;
1378   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1379   row = a->rstart;
1380   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1381   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1382   for ( i=0; i<m; i++ ) {
1383     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1384     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1385   }
1386   PetscFree(ct);
1387   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1388   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1389   if (matout != PETSC_NULL) {
1390     *matout = B;
1391   } else {
1392     PetscOps       *Abops;
1393     struct _MatOps *Aops;
1394 
1395     /* This isn't really an in-place transpose .... but free data structures from a */
1396     PetscFree(a->rowners);
1397     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1398     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1399     if (a->colmap) PetscFree(a->colmap);
1400     if (a->garray) PetscFree(a->garray);
1401     if (a->lvec) VecDestroy(a->lvec);
1402     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1403     PetscFree(a);
1404 
1405     /*
1406        This is horrible, horrible code. We need to keep the
1407       A pointers for the bops and ops but copy everything
1408       else from C.
1409     */
1410     Abops = A->bops;
1411     Aops  = A->ops;
1412     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1413     A->bops = Abops;
1414     A->ops  = Aops;
1415     PetscHeaderDestroy(B);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNC__
1421 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1422 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1423 {
1424   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1425   Mat a = aij->A, b = aij->B;
1426   int ierr,s1,s2,s3;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1430   if (rr) {
1431     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1432     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1433     /* Overlap communication with computation. */
1434     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1435   }
1436   if (ll) {
1437     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1438     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1439     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1440   }
1441   /* scale  the diagonal block */
1442   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1443 
1444   if (rr) {
1445     /* Do a scatter end and then right scale the off-diagonal block */
1446     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1447     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1448   }
1449 
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 
1454 extern int MatPrintHelp_SeqAIJ(Mat);
1455 #undef __FUNC__
1456 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1457 int MatPrintHelp_MPIAIJ(Mat A)
1458 {
1459   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   if (!a->rank) {
1464     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNC__
1470 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1471 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1472 {
1473   PetscFunctionBegin;
1474   *bs = 1;
1475   PetscFunctionReturn(0);
1476 }
1477 #undef __FUNC__
1478 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1479 int MatSetUnfactored_MPIAIJ(Mat A)
1480 {
1481   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1482   int        ierr;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNC__
1490 #define __FUNC__ "MatEqual_MPIAIJ"
1491 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1492 {
1493   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1494   Mat        a, b, c, d;
1495   PetscTruth flg;
1496   int        ierr;
1497 
1498   PetscFunctionBegin;
1499   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1500   a = matA->A; b = matA->B;
1501   c = matB->A; d = matB->B;
1502 
1503   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1504   if (flg == PETSC_TRUE) {
1505     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1506   }
1507   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1512 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1513 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1514 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1515 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *);
1516 
1517 /* -------------------------------------------------------------------*/
1518 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1519        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1520        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1521        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1522        0,0,
1523        0,0,
1524        0,0,
1525        MatRelax_MPIAIJ,
1526        MatTranspose_MPIAIJ,
1527        MatGetInfo_MPIAIJ,MatEqual_MPIAIJ,
1528        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1529        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1530        0,
1531        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1532        0,0,0,0,
1533        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1534        0,0,
1535        0,0,MatConvertSameType_MPIAIJ,0,0,
1536        0,0,0,
1537        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1538        MatPrintHelp_MPIAIJ,
1539        MatScale_MPIAIJ,0,0,0,
1540        MatGetBlockSize_MPIAIJ,0,0,0,0,
1541        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ,
1542        0,0,MatGetSubMatrix_MPIAIJ};
1543 
1544 
1545 #undef __FUNC__
1546 #define __FUNC__ "MatCreateMPIAIJ"
1547 /*@C
1548    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1549    (the default parallel PETSc format).  For good matrix assembly performance
1550    the user should preallocate the matrix storage by setting the parameters
1551    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1552    performance can be increased by more than a factor of 50.
1553 
1554    Collective on MPI_Comm
1555 
1556    Input Parameters:
1557 +  comm - MPI communicator
1558 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1559            This value should be the same as the local size used in creating the
1560            y vector for the matrix-vector product y = Ax.
1561 .  n - This value should be the same as the local size used in creating the
1562        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1563        calculated if N is given) For square matrices n is almost always m.
1564 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1565 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1566 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1567            (same for all local rows)
1568 .  d_nzz - array containing the number of nonzeros in the various rows of the
1569            diagonal portion of the local submatrix (possibly different for each row)
1570            or PETSC_NULL. You must leave room for the diagonal entry even if
1571            it is zero.
1572 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1573            submatrix (same for all local rows).
1574 -  o_nzz - array containing the number of nonzeros in the various rows of the
1575            off-diagonal portion of the local submatrix (possibly different for
1576            each row) or PETSC_NULL.
1577 
1578    Output Parameter:
1579 .  A - the matrix
1580 
1581    Notes:
1582    The AIJ format (also called the Yale sparse matrix format or
1583    compressed row storage), is fully compatible with standard Fortran 77
1584    storage.  That is, the stored row and column indices can begin at
1585    either one (as in Fortran) or zero.  See the users manual for details.
1586 
1587    The user MUST specify either the local or global matrix dimensions
1588    (possibly both).
1589 
1590    By default, this format uses inodes (identical nodes) when possible.
1591    We search for consecutive rows with the same nonzero structure, thereby
1592    reusing matrix information to achieve increased efficiency.
1593 
1594    Options Database Keys:
1595 +  -mat_aij_no_inode  - Do not use inodes
1596 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1597 -  -mat_aij_oneindex - Internally use indexing starting at 1
1598         rather than 0.  Note that when calling MatSetValues(),
1599         the user still MUST index entries starting at 0!
1600 
1601    Storage Information:
1602    For a square global matrix we define each processor's diagonal portion
1603    to be its local rows and the corresponding columns (a square submatrix);
1604    each processor's off-diagonal portion encompasses the remainder of the
1605    local matrix (a rectangular submatrix).
1606 
1607    The user can specify preallocated storage for the diagonal part of
1608    the local submatrix with either d_nz or d_nnz (not both).  Set
1609    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1610    memory allocation.  Likewise, specify preallocated storage for the
1611    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1612 
1613    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1614    the figure below we depict these three local rows and all columns (0-11).
1615 
1616 .vb
1617              0 1 2 3 4 5 6 7 8 9 10 11
1618             -------------------
1619      row 3  |  o o o d d d o o o o o o
1620      row 4  |  o o o d d d o o o o o o
1621      row 5  |  o o o d d d o o o o o o
1622             -------------------
1623 .ve
1624 
1625    Thus, any entries in the d locations are stored in the d (diagonal)
1626    submatrix, and any entries in the o locations are stored in the
1627    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1628    stored simply in the MATSEQAIJ format for compressed row storage.
1629 
1630    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1631    and o_nz should indicate the number of nonzeros per row in the o matrix.
1632    In general, for PDE problems in which most nonzeros are near the diagonal,
1633    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1634    or you will get TERRIBLE performance; see the users' manual chapter on
1635    matrices.
1636 
1637 .keywords: matrix, aij, compressed row, sparse, parallel
1638 
1639 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1640 @*/
1641 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1642 {
1643   Mat          B;
1644   Mat_MPIAIJ   *b;
1645   int          ierr, i,sum[2],work[2],size;
1646 
1647   PetscFunctionBegin;
1648   MPI_Comm_size(comm,&size);
1649   if (size == 1) {
1650     if (M == PETSC_DECIDE) M = m;
1651     if (N == PETSC_DECIDE) N = n;
1652     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1653     PetscFunctionReturn(0);
1654   }
1655 
1656   *A = 0;
1657   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView);
1658   PLogObjectCreate(B);
1659   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1660   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1661   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1662   B->ops->destroy    = MatDestroy_MPIAIJ;
1663   B->ops->view       = MatView_MPIAIJ;
1664   B->factor     = 0;
1665   B->assembled  = PETSC_FALSE;
1666   B->mapping    = 0;
1667 
1668   B->insertmode = NOT_SET_VALUES;
1669   b->size       = size;
1670   MPI_Comm_rank(comm,&b->rank);
1671 
1672   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1673     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1674   }
1675 
1676   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1677     work[0] = m; work[1] = n;
1678     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1679     if (M == PETSC_DECIDE) M = sum[0];
1680     if (N == PETSC_DECIDE) N = sum[1];
1681   }
1682   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1683   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1684   b->m = m; B->m = m;
1685   b->n = n; B->n = n;
1686   b->N = N; B->N = N;
1687   b->M = M; B->M = M;
1688 
1689   /* build local table of row and column ownerships */
1690   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1691   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1692   b->cowners = b->rowners + b->size + 2;
1693   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1694   b->rowners[0] = 0;
1695   for ( i=2; i<=b->size; i++ ) {
1696     b->rowners[i] += b->rowners[i-1];
1697   }
1698   b->rstart = b->rowners[b->rank];
1699   b->rend   = b->rowners[b->rank+1];
1700   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1701   b->cowners[0] = 0;
1702   for ( i=2; i<=b->size; i++ ) {
1703     b->cowners[i] += b->cowners[i-1];
1704   }
1705   b->cstart = b->cowners[b->rank];
1706   b->cend   = b->cowners[b->rank+1];
1707 
1708   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1709   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1710   PLogObjectParent(B,b->A);
1711   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1712   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1713   PLogObjectParent(B,b->B);
1714 
1715   /* build cache for off array entries formed */
1716   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1717   b->donotstash  = 0;
1718   b->colmap      = 0;
1719   b->garray      = 0;
1720   b->roworiented = 1;
1721 
1722   /* stuff used for matrix vector multiply */
1723   b->lvec      = 0;
1724   b->Mvctx     = 0;
1725 
1726   /* stuff for MatGetRow() */
1727   b->rowindices   = 0;
1728   b->rowvalues    = 0;
1729   b->getrowactive = PETSC_FALSE;
1730 
1731   *A = B;
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNC__
1736 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1737 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1738 {
1739   Mat        mat;
1740   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1741   int        ierr, len=0, flg;
1742 
1743   PetscFunctionBegin;
1744   *newmat       = 0;
1745   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView);
1746   PLogObjectCreate(mat);
1747   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1748   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
1749   mat->ops->destroy    = MatDestroy_MPIAIJ;
1750   mat->ops->view       = MatView_MPIAIJ;
1751   mat->factor     = matin->factor;
1752   mat->assembled  = PETSC_TRUE;
1753 
1754   a->m = mat->m   = oldmat->m;
1755   a->n = mat->n   = oldmat->n;
1756   a->M = mat->M   = oldmat->M;
1757   a->N = mat->N   = oldmat->N;
1758 
1759   a->rstart       = oldmat->rstart;
1760   a->rend         = oldmat->rend;
1761   a->cstart       = oldmat->cstart;
1762   a->cend         = oldmat->cend;
1763   a->size         = oldmat->size;
1764   a->rank         = oldmat->rank;
1765   mat->insertmode = NOT_SET_VALUES;
1766   a->rowvalues    = 0;
1767   a->getrowactive = PETSC_FALSE;
1768 
1769   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1770   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771   a->cowners = a->rowners + a->size + 2;
1772   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1773   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1774   if (oldmat->colmap) {
1775     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1776     PLogObjectMemory(mat,(a->N)*sizeof(int));
1777     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1778   } else a->colmap = 0;
1779   if (oldmat->garray) {
1780     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1781     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1782     PLogObjectMemory(mat,len*sizeof(int));
1783     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1784   } else a->garray = 0;
1785 
1786   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1787   PLogObjectParent(mat,a->lvec);
1788   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1789   PLogObjectParent(mat,a->Mvctx);
1790   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1791   PLogObjectParent(mat,a->A);
1792   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1793   PLogObjectParent(mat,a->B);
1794   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1795   if (flg) {
1796     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1797   }
1798   *newmat = mat;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #include "sys.h"
1803 
1804 #undef __FUNC__
1805 #define __FUNC__ "MatLoad_MPIAIJ"
1806 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1807 {
1808   Mat          A;
1809   Scalar       *vals,*svals;
1810   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1811   MPI_Status   status;
1812   int          i, nz, ierr, j,rstart, rend, fd;
1813   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1814   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1815   int          tag = ((PetscObject)viewer)->tag;
1816 
1817   PetscFunctionBegin;
1818   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1819   if (!rank) {
1820     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1821     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1822     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1823     if (header[3] < 0) {
1824       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1825     }
1826   }
1827 
1828 
1829   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1830   M = header[1]; N = header[2];
1831   /* determine ownership of all rows */
1832   m = M/size + ((M % size) > rank);
1833   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1834   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1835   rowners[0] = 0;
1836   for ( i=2; i<=size; i++ ) {
1837     rowners[i] += rowners[i-1];
1838   }
1839   rstart = rowners[rank];
1840   rend   = rowners[rank+1];
1841 
1842   /* distribute row lengths to all processors */
1843   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1844   offlens = ourlens + (rend-rstart);
1845   if (!rank) {
1846     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1847     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1848     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1849     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1850     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1851     PetscFree(sndcounts);
1852   } else {
1853     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1854   }
1855 
1856   if (!rank) {
1857     /* calculate the number of nonzeros on each processor */
1858     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1859     PetscMemzero(procsnz,size*sizeof(int));
1860     for ( i=0; i<size; i++ ) {
1861       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1862         procsnz[i] += rowlengths[j];
1863       }
1864     }
1865     PetscFree(rowlengths);
1866 
1867     /* determine max buffer needed and allocate it */
1868     maxnz = 0;
1869     for ( i=0; i<size; i++ ) {
1870       maxnz = PetscMax(maxnz,procsnz[i]);
1871     }
1872     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1873 
1874     /* read in my part of the matrix column indices  */
1875     nz     = procsnz[0];
1876     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1877     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1878 
1879     /* read in every one elses and ship off */
1880     for ( i=1; i<size; i++ ) {
1881       nz   = procsnz[i];
1882       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1883       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1884     }
1885     PetscFree(cols);
1886   } else {
1887     /* determine buffer space needed for message */
1888     nz = 0;
1889     for ( i=0; i<m; i++ ) {
1890       nz += ourlens[i];
1891     }
1892     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1893 
1894     /* receive message of column indices*/
1895     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1896     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1897     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1898   }
1899 
1900   /* loop over local rows, determining number of off diagonal entries */
1901   PetscMemzero(offlens,m*sizeof(int));
1902   jj = 0;
1903   for ( i=0; i<m; i++ ) {
1904     for ( j=0; j<ourlens[i]; j++ ) {
1905       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1906       jj++;
1907     }
1908   }
1909 
1910   /* create our matrix */
1911   for ( i=0; i<m; i++ ) {
1912     ourlens[i] -= offlens[i];
1913   }
1914   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1915   A = *newmat;
1916   MatSetOption(A,MAT_COLUMNS_SORTED);
1917   for ( i=0; i<m; i++ ) {
1918     ourlens[i] += offlens[i];
1919   }
1920 
1921   if (!rank) {
1922     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1923 
1924     /* read in my part of the matrix numerical values  */
1925     nz = procsnz[0];
1926     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1927 
1928     /* insert into matrix */
1929     jj      = rstart;
1930     smycols = mycols;
1931     svals   = vals;
1932     for ( i=0; i<m; i++ ) {
1933       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1934       smycols += ourlens[i];
1935       svals   += ourlens[i];
1936       jj++;
1937     }
1938 
1939     /* read in other processors and ship out */
1940     for ( i=1; i<size; i++ ) {
1941       nz   = procsnz[i];
1942       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1943       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1944     }
1945     PetscFree(procsnz);
1946   } else {
1947     /* receive numeric values */
1948     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1949 
1950     /* receive message of values*/
1951     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1952     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1953     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
1954 
1955     /* insert into matrix */
1956     jj      = rstart;
1957     smycols = mycols;
1958     svals   = vals;
1959     for ( i=0; i<m; i++ ) {
1960       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1961       smycols += ourlens[i];
1962       svals   += ourlens[i];
1963       jj++;
1964     }
1965   }
1966   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1967 
1968   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1969   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNC__
1974 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
1975 /*
1976     Not great since it makes two copies of the submatrix, first an SeqAIJ
1977   in local and then by concatenating the local matrices the end result.
1978   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1979 */
1980 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat)
1981 {
1982   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1983   Mat        *local,M, Mreuse;
1984   Scalar     *vwork,*aa;
1985   MPI_Comm   comm = mat->comm;
1986   Mat_SeqAIJ *aij;
1987   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
1988 
1989   PetscFunctionBegin;
1990   MPI_Comm_rank(comm,&rank);
1991   MPI_Comm_size(comm,&size);
1992 
1993   if (call ==  MAT_REUSE_MATRIX) {
1994     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1995     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
1996     local = &Mreuse;
1997     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1998   } else {
1999     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2000     Mreuse = *local;
2001     PetscFree(local);
2002   }
2003 
2004   /*
2005       m - number of local rows
2006       n - number of columns (same on all processors)
2007       rstart - first row in new global matrix generated
2008   */
2009   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2010   if (call == MAT_INITIAL_MATRIX) {
2011     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2012     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2013     ii  = aij->i;
2014     jj  = aij->j;
2015 
2016     /*
2017         Determine the number of non-zeros in the diagonal and off-diagonal
2018         portions of the matrix in order to do correct preallocation
2019     */
2020 
2021     /* first get start and end of "diagonal" columns */
2022     if (csize == PETSC_DECIDE) {
2023       nlocal = n/size + ((n % size) > rank);
2024     } else {
2025       nlocal = csize;
2026     }
2027     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2028     rstart = rend - nlocal;
2029     if (rank == size - 1 && rend != n) {
2030       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2031     }
2032 
2033     /* next, compute all the lengths */
2034     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2035     olens = dlens + m;
2036     for ( i=0; i<m; i++ ) {
2037       jend = ii[i+1] - ii[i];
2038       olen = 0;
2039       dlen = 0;
2040       for ( j=0; j<jend; j++ ) {
2041         if ( *jj < rstart || *jj >= rend) olen++;
2042         else dlen++;
2043         jj++;
2044       }
2045       olens[i] = olen;
2046       dlens[i] = dlen;
2047     }
2048     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2049     PetscFree(dlens);
2050   } else {
2051     int ml,nl;
2052 
2053     M = *newmat;
2054     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2055     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2056     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2057     /*
2058          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2059        rather than the slower MatSetValues().
2060     */
2061     M->was_assembled = PETSC_TRUE;
2062     M->assembled     = PETSC_FALSE;
2063   }
2064   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2065   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2066   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2067   ii  = aij->i;
2068   jj  = aij->j;
2069   aa  = aij->a;
2070   for (i=0; i<m; i++) {
2071     row   = rstart + i;
2072     nz    = ii[i+1] - ii[i];
2073     cwork = jj;     jj += nz;
2074     vwork = aa;     aa += nz;
2075     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2076   }
2077 
2078   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2080   *newmat = M;
2081 
2082   /* save submatrix used in processor for next request */
2083   if (call ==  MAT_INITIAL_MATRIX) {
2084     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2085     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2086   }
2087 
2088   PetscFunctionReturn(0);
2089 }
2090