xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 83e2fdc756c13f964f3ed5aaff42e3626c5b72bc)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.210 1997/07/16 22:00:13 balay Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #undef __FUNC__
33 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
35                            PetscTruth *done)
36 {
37   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
38   int        ierr;
39   if (aij->size == 1) {
40     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
41   } else SETERRQ(1,0,"not supported in parallel");
42   return 0;
43 }
44 
45 #undef __FUNC__
46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */
47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
48                                PetscTruth *done)
49 {
50   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
51   int        ierr;
52   if (aij->size == 1) {
53     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54   } else SETERRQ(1,0,"not supported in parallel");
55   return 0;
56 }
57 
58 #define CHUNKSIZE   15
59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
60 { \
61  \
62     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
63     rmax = aimax[row]; nrow = ailen[row];  \
64     col1 = col - shift; \
65      \
66     low = 0; high = nrow; \
67     while (high-low > 5) { \
68       t = (low+high)/2; \
69       if (rp[t] > col) high = t; \
70       else             low  = t; \
71     } \
72       for ( _i=0; _i<nrow; _i++ ) { \
73         if (rp[_i] > col1) break; \
74         if (rp[_i] == col1) { \
75           if (addv == ADD_VALUES) ap[_i] += value;   \
76           else                  ap[_i] = value; \
77           goto a_noinsert; \
78         } \
79       }  \
80       if (nonew == 1) goto a_noinsert; \
81       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
82       if (nrow >= rmax) { \
83         /* there is no extra room in row, therefore enlarge */ \
84         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
85         Scalar *new_a; \
86  \
87         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
88  \
89         /* malloc new storage space */ \
90         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
91         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
92         new_j   = (int *) (new_a + new_nz); \
93         new_i   = new_j + new_nz; \
94  \
95         /* copy over old data into new slots */ \
96         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
97         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
98         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
99         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
100         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
101                                                            len*sizeof(int)); \
102         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
103         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
104                                                            len*sizeof(Scalar));  \
105         /* free up old matrix storage */ \
106  \
107         PetscFree(a->a);  \
108         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
109         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
110         a->singlemalloc = 1; \
111  \
112         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
113         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
114         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
115         a->maxnz += CHUNKSIZE; \
116         a->reallocs++; \
117       } \
118       N = nrow++ - 1; a->nz++; \
119       /* shift up all the later entries in this row */ \
120       for ( ii=N; ii>=_i; ii-- ) { \
121         rp[ii+1] = rp[ii]; \
122         ap[ii+1] = ap[ii]; \
123       } \
124       rp[_i] = col1;  \
125       ap[_i] = value;  \
126       a_noinsert: ; \
127       ailen[row] = nrow; \
128 }
129 
130 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
131 { \
132  \
133     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
134     rmax = bimax[row]; nrow = bilen[row];  \
135     col1 = col - shift; \
136      \
137     low = 0; high = nrow; \
138     while (high-low > 5) { \
139       t = (low+high)/2; \
140       if (rp[t] > col) high = t; \
141       else             low  = t; \
142     } \
143        for ( _i=0; _i<nrow; _i++ ) { \
144         if (rp[_i] > col1) break; \
145         if (rp[_i] == col1) { \
146           if (addv == ADD_VALUES) ap[_i] += value;   \
147           else                  ap[_i] = value; \
148           goto b_noinsert; \
149         } \
150       }  \
151       if (nonew == 1) goto b_noinsert; \
152       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
171         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
174         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
175                                                            len*sizeof(Scalar));  \
176         /* free up old matrix storage */ \
177  \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
184         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
186         b->maxnz += CHUNKSIZE; \
187         b->reallocs++; \
188       } \
189       N = nrow++ - 1; b->nz++; \
190       /* shift up all the later entries in this row */ \
191       for ( ii=N; ii>=_i; ii-- ) { \
192         rp[ii+1] = rp[ii]; \
193         ap[ii+1] = ap[ii]; \
194       } \
195       rp[_i] = col1;  \
196       ap[_i] = value;  \
197       b_noinsert: ; \
198       bilen[row] = nrow; \
199 }
200 
201 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
202 #undef __FUNC__
203 #define __FUNC__ "MatSetValues_MPIAIJ"
204 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
205 {
206   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
207   Scalar     value;
208   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
209   int        cstart = aij->cstart, cend = aij->cend,row,col;
210   int        roworiented = aij->roworiented;
211 
212   /* Some Variables required in the macro */
213   Mat        A = aij->A;
214   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
215   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
216   Scalar     *aa = a->a;
217 
218   Mat        B = aij->B;
219   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
220   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
221   Scalar     *ba = b->a;
222 
223   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
224   int        nonew = a->nonew,shift = a->indexshift;
225   Scalar     *ap;
226 
227   for ( i=0; i<m; i++ ) {
228 #if defined(PETSC_BOPT_g)
229     if (im[i] < 0) SETERRQ(1,0,"Negative row");
230     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
231 #endif
232     if (im[i] >= rstart && im[i] < rend) {
233       row = im[i] - rstart;
234       for ( j=0; j<n; j++ ) {
235         if (in[j] >= cstart && in[j] < cend){
236           col = in[j] - cstart;
237           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
238           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
239           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
240         }
241 #if defined(PETSC_BOPT_g)
242         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
243         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
244 #endif
245         else {
246           if (mat->was_assembled) {
247             if (!aij->colmap) {
248               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
249             }
250             col = aij->colmap[in[j]] - 1;
251             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
252               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
253               col =  in[j];
254               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
255               B = aij->B;
256               b = (Mat_SeqAIJ *) B->data;
257               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
258               ba = b->a;
259             }
260           }
261           else col = in[j];
262           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
263           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
264           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
265         }
266       }
267     }
268     else {
269       if (roworiented && !aij->donotstash) {
270         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
271       }
272       else {
273         if (!aij->donotstash) {
274           row = im[i];
275           for ( j=0; j<n; j++ ) {
276             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
277           }
278         }
279       }
280     }
281   }
282   return 0;
283 }
284 
285 #undef __FUNC__
286 #define __FUNC__ "MatGetValues_MPIAIJ"
287 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
290   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
291   int        cstart = aij->cstart, cend = aij->cend,row,col;
292 
293   for ( i=0; i<m; i++ ) {
294     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
295     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
296     if (idxm[i] >= rstart && idxm[i] < rend) {
297       row = idxm[i] - rstart;
298       for ( j=0; j<n; j++ ) {
299         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
300         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
301         if (idxn[j] >= cstart && idxn[j] < cend){
302           col = idxn[j] - cstart;
303           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
304         }
305         else {
306           if (!aij->colmap) {
307             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
308           }
309           col = aij->colmap[idxn[j]] - 1;
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
313           }
314         }
315       }
316     }
317     else {
318       SETERRQ(1,0,"Only local values currently supported");
319     }
320   }
321   return 0;
322 }
323 
324 #undef __FUNC__
325 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
326 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
327 {
328   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
329   MPI_Comm    comm = mat->comm;
330   int         size = aij->size, *owners = aij->rowners;
331   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
332   MPI_Request *send_waits,*recv_waits;
333   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
334   InsertMode  addv;
335   Scalar      *rvalues,*svalues;
336 
337   /* make sure all processors are either in INSERTMODE or ADDMODE */
338   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
339   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340     SETERRQ(1,0,"Some processors inserted others added");
341   }
342   mat->insertmode = addv; /* in case this processor had no cache */
343 
344   /*  first count number of contributors to each processor */
345   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
346   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
347   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
348   for ( i=0; i<aij->stash.n; i++ ) {
349     idx = aij->stash.idx[i];
350     for ( j=0; j<size; j++ ) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
353       }
354     }
355   }
356   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
357 
358   /* inform other processors of number of messages and max length*/
359   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
360   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
361   nreceives = work[rank];
362   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
363   nmax = work[rank];
364   PetscFree(work);
365 
366   /* post receives:
367        1) each message will consist of ordered pairs
368      (global index,value) we store the global index as a double
369      to simplify the message passing.
370        2) since we don't know how long each individual message is we
371      allocate the largest needed buffer for each receive. Potentially
372      this is a lot of wasted space.
373 
374 
375        This could be done better.
376   */
377   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
378   CHKPTRQ(rvalues);
379   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
380   CHKPTRQ(recv_waits);
381   for ( i=0; i<nreceives; i++ ) {
382     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
383               comm,recv_waits+i);
384   }
385 
386   /* do sends:
387       1) starts[i] gives the starting index in svalues for stuff going to
388          the ith processor
389   */
390   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
391   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
392   CHKPTRQ(send_waits);
393   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
394   starts[0] = 0;
395   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
396   for ( i=0; i<aij->stash.n; i++ ) {
397     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
398     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
399     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
400   }
401   PetscFree(owner);
402   starts[0] = 0;
403   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
404   count = 0;
405   for ( i=0; i<size; i++ ) {
406     if (procs[i]) {
407       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
408                 comm,send_waits+count++);
409     }
410   }
411   PetscFree(starts); PetscFree(nprocs);
412 
413   /* Free cache space */
414   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
415   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
416 
417   aij->svalues    = svalues;    aij->rvalues    = rvalues;
418   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
419   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
420   aij->rmax       = nmax;
421 
422   return 0;
423 }
424 extern int MatSetUpMultiply_MPIAIJ(Mat);
425 
426 #undef __FUNC__
427 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
428 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
429 {
430   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
431   MPI_Status  *send_status,recv_status;
432   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
433   int         row,col,other_disassembled;
434   Scalar      *values,val;
435   InsertMode  addv = mat->insertmode;
436 
437   /*  wait on receives */
438   while (count) {
439     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
440     /* unpack receives into our local space */
441     values = aij->rvalues + 3*imdex*aij->rmax;
442     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
443     n = n/3;
444     for ( i=0; i<n; i++ ) {
445       row = (int) PetscReal(values[3*i]) - aij->rstart;
446       col = (int) PetscReal(values[3*i+1]);
447       val = values[3*i+2];
448       if (col >= aij->cstart && col < aij->cend) {
449         col -= aij->cstart;
450         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
451       }
452       else {
453         if (mat->was_assembled || mat->assembled) {
454           if (!aij->colmap) {
455             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
456           }
457           col = aij->colmap[col] - 1;
458           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
459             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460             col = (int) PetscReal(values[3*i+1]);
461           }
462         }
463         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
464       }
465     }
466     count--;
467   }
468   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
469 
470   /* wait on sends */
471   if (aij->nsends) {
472     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
473     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
474     PetscFree(send_status);
475   }
476   PetscFree(aij->send_waits); PetscFree(aij->svalues);
477 
478   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
479   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
480 
481   /* determine if any processor has disassembled, if so we must
482      also disassemble ourselfs, in order that we may reassemble. */
483   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
484   if (mat->was_assembled && !other_disassembled) {
485     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
486   }
487 
488   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
489     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
490   }
491   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
492   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
493 
494   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
495   return 0;
496 }
497 
498 #undef __FUNC__
499 #define __FUNC__ "MatZeroEntries_MPIAIJ"
500 int MatZeroEntries_MPIAIJ(Mat A)
501 {
502   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
503   int        ierr;
504   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
505   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
506   return 0;
507 }
508 
509 /* the code does not do the diagonal entries correctly unless the
510    matrix is square and the column and row owerships are identical.
511    This is a BUG. The only way to fix it seems to be to access
512    aij->A and aij->B directly and not through the MatZeroRows()
513    routine.
514 */
515 #undef __FUNC__
516 #define __FUNC__ "MatZeroRows_MPIAIJ"
517 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
518 {
519   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
520   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
521   int            *procs,*nprocs,j,found,idx,nsends,*work;
522   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
523   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
524   int            *lens,imdex,*lrows,*values;
525   MPI_Comm       comm = A->comm;
526   MPI_Request    *send_waits,*recv_waits;
527   MPI_Status     recv_status,*send_status;
528   IS             istmp;
529 
530   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
531   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
532 
533   /*  first count number of contributors to each processor */
534   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
535   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
536   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
537   for ( i=0; i<N; i++ ) {
538     idx = rows[i];
539     found = 0;
540     for ( j=0; j<size; j++ ) {
541       if (idx >= owners[j] && idx < owners[j+1]) {
542         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
543       }
544     }
545     if (!found) SETERRQ(1,0,"Index out of range");
546   }
547   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
548 
549   /* inform other processors of number of messages and max length*/
550   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
551   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
552   nrecvs = work[rank];
553   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
554   nmax = work[rank];
555   PetscFree(work);
556 
557   /* post receives:   */
558   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
559   CHKPTRQ(rvalues);
560   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
561   CHKPTRQ(recv_waits);
562   for ( i=0; i<nrecvs; i++ ) {
563     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
564   }
565 
566   /* do sends:
567       1) starts[i] gives the starting index in svalues for stuff going to
568          the ith processor
569   */
570   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
571   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
572   CHKPTRQ(send_waits);
573   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
574   starts[0] = 0;
575   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
576   for ( i=0; i<N; i++ ) {
577     svalues[starts[owner[i]]++] = rows[i];
578   }
579   ISRestoreIndices(is,&rows);
580 
581   starts[0] = 0;
582   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
583   count = 0;
584   for ( i=0; i<size; i++ ) {
585     if (procs[i]) {
586       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
587     }
588   }
589   PetscFree(starts);
590 
591   base = owners[rank];
592 
593   /*  wait on receives */
594   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
595   source = lens + nrecvs;
596   count  = nrecvs; slen = 0;
597   while (count) {
598     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
599     /* unpack receives into our local space */
600     MPI_Get_count(&recv_status,MPI_INT,&n);
601     source[imdex]  = recv_status.MPI_SOURCE;
602     lens[imdex]  = n;
603     slen += n;
604     count--;
605   }
606   PetscFree(recv_waits);
607 
608   /* move the data into the send scatter */
609   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
610   count = 0;
611   for ( i=0; i<nrecvs; i++ ) {
612     values = rvalues + i*nmax;
613     for ( j=0; j<lens[i]; j++ ) {
614       lrows[count++] = values[j] - base;
615     }
616   }
617   PetscFree(rvalues); PetscFree(lens);
618   PetscFree(owner); PetscFree(nprocs);
619 
620   /* actually zap the local rows */
621   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
622   PLogObjectParent(A,istmp);
623   PetscFree(lrows);
624   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
625   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
626   ierr = ISDestroy(istmp); CHKERRQ(ierr);
627 
628   /* wait on sends */
629   if (nsends) {
630     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
631     CHKPTRQ(send_status);
632     MPI_Waitall(nsends,send_waits,send_status);
633     PetscFree(send_status);
634   }
635   PetscFree(send_waits); PetscFree(svalues);
636 
637   return 0;
638 }
639 
640 #undef __FUNC__
641 #define __FUNC__ "MatMult_MPIAIJ"
642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645   int        ierr,nt;
646 
647   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
648   if (nt != a->n) {
649     SETERRQ(1,0,"Incompatible partition of A and xx");
650   }
651   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
652   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
653   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
654   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
655   return 0;
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatMultAdd_MPIAIJ"
660 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
661 {
662   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
663   int        ierr;
664   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
665   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
666   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
667   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
668   return 0;
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatMultTrans_MPIAIJ"
673 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
676   int        ierr;
677 
678   /* do nondiagonal part */
679   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
680   /* send it on its way */
681   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682   /* do local part */
683   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
684   /* receive remote parts: note this assumes the values are not actually */
685   /* inserted in yy until the next line, which is true for my implementation*/
686   /* but is not perhaps always true. */
687   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
688   return 0;
689 }
690 
691 #undef __FUNC__
692 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
693 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
694 {
695   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
696   int        ierr;
697 
698   /* do nondiagonal part */
699   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
700   /* send it on its way */
701   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
702   /* do local part */
703   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
704   /* receive remote parts: note this assumes the values are not actually */
705   /* inserted in yy until the next line, which is true for my implementation*/
706   /* but is not perhaps always true. */
707   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
708   return 0;
709 }
710 
711 /*
712   This only works correctly for square matrices where the subblock A->A is the
713    diagonal block
714 */
715 #undef __FUNC__
716 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
717 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
718 {
719   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
720   if (a->M != a->N)
721     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
722   if (a->rstart != a->cstart || a->rend != a->cend) {
723     SETERRQ(1,0,"row partition must equal col partition");  }
724   return MatGetDiagonal(a->A,v);
725 }
726 
727 #undef __FUNC__
728 #define __FUNC__ "MatScale_MPIAIJ"
729 int MatScale_MPIAIJ(Scalar *aa,Mat A)
730 {
731   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
732   int        ierr;
733   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
734   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
735   return 0;
736 }
737 
738 #undef __FUNC__
739 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
740 int MatDestroy_MPIAIJ(PetscObject obj)
741 {
742   Mat        mat = (Mat) obj;
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
744   int        ierr;
745 
746 #if defined(PETSC_LOG)
747   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
748 #endif
749   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
750   PetscFree(aij->rowners);
751   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
752   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
753   if (aij->colmap) PetscFree(aij->colmap);
754   if (aij->garray) PetscFree(aij->garray);
755   if (aij->lvec)   VecDestroy(aij->lvec);
756   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
757   if (aij->rowvalues) PetscFree(aij->rowvalues);
758   PetscFree(aij);
759   if (mat->mapping) {
760     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
761   }
762   PLogObjectDestroy(mat);
763   PetscHeaderDestroy(mat);
764   return 0;
765 }
766 
767 #undef __FUNC__
768 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
769 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
770 {
771   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
772   int         ierr;
773 
774   if (aij->size == 1) {
775     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
776   }
777   else SETERRQ(1,0,"Only uniprocessor output supported");
778   return 0;
779 }
780 
781 #undef __FUNC__
782 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
783 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
784 {
785   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
786   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
787   int         ierr, format,shift = C->indexshift,rank;
788   FILE        *fd;
789   ViewerType  vtype;
790 
791   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
792   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
793     ierr = ViewerGetFormat(viewer,&format);
794     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
795       MatInfo info;
796       int     flg;
797       MPI_Comm_rank(mat->comm,&rank);
798       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
799       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
800       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
801       PetscSequentialPhaseBegin(mat->comm,1);
802       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
803          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
804       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
805          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
806       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
807       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
808       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
809       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
810       fflush(fd);
811       PetscSequentialPhaseEnd(mat->comm,1);
812       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
813       return 0;
814     }
815     else if (format == VIEWER_FORMAT_ASCII_INFO) {
816       return 0;
817     }
818   }
819 
820   if (vtype == DRAW_VIEWER) {
821     Draw       draw;
822     PetscTruth isnull;
823     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
824     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
825   }
826 
827   if (vtype == ASCII_FILE_VIEWER) {
828     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
829     PetscSequentialPhaseBegin(mat->comm,1);
830     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
831            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
832            aij->cend);
833     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
834     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
835     fflush(fd);
836     PetscSequentialPhaseEnd(mat->comm,1);
837   }
838   else {
839     int size = aij->size;
840     rank = aij->rank;
841     if (size == 1) {
842       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
843     }
844     else {
845       /* assemble the entire matrix onto first processor. */
846       Mat         A;
847       Mat_SeqAIJ *Aloc;
848       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
849       Scalar      *a;
850 
851       if (!rank) {
852         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
853                CHKERRQ(ierr);
854       }
855       else {
856         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
857                CHKERRQ(ierr);
858       }
859       PLogObjectParent(mat,A);
860 
861       /* copy over the A part */
862       Aloc = (Mat_SeqAIJ*) aij->A->data;
863       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
864       row = aij->rstart;
865       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
866       for ( i=0; i<m; i++ ) {
867         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
868         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
869       }
870       aj = Aloc->j;
871       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
872 
873       /* copy over the B part */
874       Aloc = (Mat_SeqAIJ*) aij->B->data;
875       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
876       row = aij->rstart;
877       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
878       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
879       for ( i=0; i<m; i++ ) {
880         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
881         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
882       }
883       PetscFree(ct);
884       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
885       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
886       if (!rank) {
887         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
888       }
889       ierr = MatDestroy(A); CHKERRQ(ierr);
890     }
891   }
892   return 0;
893 }
894 
895 #undef __FUNC__
896 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
897 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
898 {
899   Mat         mat = (Mat) obj;
900   int         ierr;
901   ViewerType  vtype;
902 
903   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
904   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
905       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
906     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
907   }
908   else if (vtype == BINARY_FILE_VIEWER) {
909     return MatView_MPIAIJ_Binary(mat,viewer);
910   }
911   return 0;
912 }
913 
914 /*
915     This has to provide several versions.
916 
917      2) a) use only local smoothing updating outer values only once.
918         b) local smoothing updating outer values each inner iteration
919      3) color updating out values betwen colors.
920 */
921 #undef __FUNC__
922 #define __FUNC__ "MatRelax_MPIAIJ"
923 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
924                            double fshift,int its,Vec xx)
925 {
926   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
927   Mat        AA = mat->A, BB = mat->B;
928   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
929   Scalar     *b,*x,*xs,*ls,d,*v,sum;
930   int        ierr,*idx, *diag;
931   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
932 
933   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
934   xs = x + shift; /* shift by one for index start of 1 */
935   ls = ls + shift;
936   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
937   diag = A->diag;
938   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
939     if (flag & SOR_ZERO_INITIAL_GUESS) {
940       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
941     }
942     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
943     CHKERRQ(ierr);
944     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
945     CHKERRQ(ierr);
946     while (its--) {
947       /* go down through the rows */
948       for ( i=0; i<m; i++ ) {
949         n    = A->i[i+1] - A->i[i];
950         idx  = A->j + A->i[i] + shift;
951         v    = A->a + A->i[i] + shift;
952         sum  = b[i];
953         SPARSEDENSEMDOT(sum,xs,v,idx,n);
954         d    = fshift + A->a[diag[i]+shift];
955         n    = B->i[i+1] - B->i[i];
956         idx  = B->j + B->i[i] + shift;
957         v    = B->a + B->i[i] + shift;
958         SPARSEDENSEMDOT(sum,ls,v,idx,n);
959         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
960       }
961       /* come up through the rows */
962       for ( i=m-1; i>-1; i-- ) {
963         n    = A->i[i+1] - A->i[i];
964         idx  = A->j + A->i[i] + shift;
965         v    = A->a + A->i[i] + shift;
966         sum  = b[i];
967         SPARSEDENSEMDOT(sum,xs,v,idx,n);
968         d    = fshift + A->a[diag[i]+shift];
969         n    = B->i[i+1] - B->i[i];
970         idx  = B->j + B->i[i] + shift;
971         v    = B->a + B->i[i] + shift;
972         SPARSEDENSEMDOT(sum,ls,v,idx,n);
973         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
974       }
975     }
976   }
977   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
978     if (flag & SOR_ZERO_INITIAL_GUESS) {
979       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
980     }
981     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
982     CHKERRQ(ierr);
983     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
984     CHKERRQ(ierr);
985     while (its--) {
986       for ( i=0; i<m; i++ ) {
987         n    = A->i[i+1] - A->i[i];
988         idx  = A->j + A->i[i] + shift;
989         v    = A->a + A->i[i] + shift;
990         sum  = b[i];
991         SPARSEDENSEMDOT(sum,xs,v,idx,n);
992         d    = fshift + A->a[diag[i]+shift];
993         n    = B->i[i+1] - B->i[i];
994         idx  = B->j + B->i[i] + shift;
995         v    = B->a + B->i[i] + shift;
996         SPARSEDENSEMDOT(sum,ls,v,idx,n);
997         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
998       }
999     }
1000   }
1001   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1002     if (flag & SOR_ZERO_INITIAL_GUESS) {
1003       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
1004     }
1005     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1006                             mat->Mvctx); CHKERRQ(ierr);
1007     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
1008                             mat->Mvctx); CHKERRQ(ierr);
1009     while (its--) {
1010       for ( i=m-1; i>-1; i-- ) {
1011         n    = A->i[i+1] - A->i[i];
1012         idx  = A->j + A->i[i] + shift;
1013         v    = A->a + A->i[i] + shift;
1014         sum  = b[i];
1015         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1016         d    = fshift + A->a[diag[i]+shift];
1017         n    = B->i[i+1] - B->i[i];
1018         idx  = B->j + B->i[i] + shift;
1019         v    = B->a + B->i[i] + shift;
1020         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1021         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1022       }
1023     }
1024   }
1025   else {
1026     SETERRQ(1,0,"Option not supported");
1027   }
1028   return 0;
1029 }
1030 
1031 #undef __FUNC__
1032 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1033 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1034 {
1035   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1036   Mat        A = mat->A, B = mat->B;
1037   int        ierr;
1038   double     isend[5], irecv[5];
1039 
1040   info->rows_global    = (double)mat->M;
1041   info->columns_global = (double)mat->N;
1042   info->rows_local     = (double)mat->m;
1043   info->columns_local  = (double)mat->N;
1044   info->block_size     = 1.0;
1045   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1046   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1047   isend[3] = info->memory;  isend[4] = info->mallocs;
1048   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1049   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1050   isend[3] += info->memory;  isend[4] += info->mallocs;
1051   if (flag == MAT_LOCAL) {
1052     info->nz_used      = isend[0];
1053     info->nz_allocated = isend[1];
1054     info->nz_unneeded  = isend[2];
1055     info->memory       = isend[3];
1056     info->mallocs      = isend[4];
1057   } else if (flag == MAT_GLOBAL_MAX) {
1058     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1059     info->nz_used      = irecv[0];
1060     info->nz_allocated = irecv[1];
1061     info->nz_unneeded  = irecv[2];
1062     info->memory       = irecv[3];
1063     info->mallocs      = irecv[4];
1064   } else if (flag == MAT_GLOBAL_SUM) {
1065     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1066     info->nz_used      = irecv[0];
1067     info->nz_allocated = irecv[1];
1068     info->nz_unneeded  = irecv[2];
1069     info->memory       = irecv[3];
1070     info->mallocs      = irecv[4];
1071   }
1072   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1073   info->fill_ratio_needed = 0;
1074   info->factor_mallocs    = 0;
1075 
1076   return 0;
1077 }
1078 
1079 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1080 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1081 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1082 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1083 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1084 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1085 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1086 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1090 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1093 
1094   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1095       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1096       op == MAT_COLUMNS_UNSORTED ||
1097       op == MAT_COLUMNS_SORTED ||
1098       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1099       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1100         MatSetOption(a->A,op);
1101         MatSetOption(a->B,op);
1102   } else if (op == MAT_ROW_ORIENTED) {
1103     a->roworiented = 1;
1104     MatSetOption(a->A,op);
1105     MatSetOption(a->B,op);
1106   } else if (op == MAT_ROWS_SORTED ||
1107              op == MAT_ROWS_UNSORTED ||
1108              op == MAT_SYMMETRIC ||
1109              op == MAT_STRUCTURALLY_SYMMETRIC ||
1110              op == MAT_YES_NEW_DIAGONALS)
1111     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1112   else if (op == MAT_COLUMN_ORIENTED) {
1113     a->roworiented = 0;
1114     MatSetOption(a->A,op);
1115     MatSetOption(a->B,op);
1116   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1117     a->donotstash = 1;
1118   } else if (op == MAT_NO_NEW_DIAGONALS)
1119     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1120   else
1121     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1122   return 0;
1123 }
1124 
1125 #undef __FUNC__
1126 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1127 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1128 {
1129   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1130   *m = mat->M; *n = mat->N;
1131   return 0;
1132 }
1133 
1134 #undef __FUNC__
1135 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1136 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1137 {
1138   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1139   *m = mat->m; *n = mat->N;
1140   return 0;
1141 }
1142 
1143 #undef __FUNC__
1144 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1145 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1146 {
1147   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1148   *m = mat->rstart; *n = mat->rend;
1149   return 0;
1150 }
1151 
1152 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1153 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1154 
1155 #undef __FUNC__
1156 #define __FUNC__ "MatGetRow_MPIAIJ"
1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1158 {
1159   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1160   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1161   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1162   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1163   int        *cmap, *idx_p;
1164 
1165   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1166   mat->getrowactive = PETSC_TRUE;
1167 
1168   if (!mat->rowvalues && (idx || v)) {
1169     /*
1170         allocate enough space to hold information from the longest row.
1171     */
1172     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1173     int     max = 1,m = mat->m,tmp;
1174     for ( i=0; i<m; i++ ) {
1175       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1176       if (max < tmp) { max = tmp; }
1177     }
1178     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1179     CHKPTRQ(mat->rowvalues);
1180     mat->rowindices = (int *) (mat->rowvalues + max);
1181   }
1182 
1183   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1184   lrow = row - rstart;
1185 
1186   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187   if (!v)   {pvA = 0; pvB = 0;}
1188   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1190   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1191   nztot = nzA + nzB;
1192 
1193   cmap  = mat->garray;
1194   if (v  || idx) {
1195     if (nztot) {
1196       /* Sort by increasing column numbers, assuming A and B already sorted */
1197       int imark = -1;
1198       if (v) {
1199         *v = v_p = mat->rowvalues;
1200         for ( i=0; i<nzB; i++ ) {
1201           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202           else break;
1203         }
1204         imark = i;
1205         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1206         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1207       }
1208       if (idx) {
1209         *idx = idx_p = mat->rowindices;
1210         if (imark > -1) {
1211           for ( i=0; i<imark; i++ ) {
1212             idx_p[i] = cmap[cworkB[i]];
1213           }
1214         } else {
1215           for ( i=0; i<nzB; i++ ) {
1216             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217             else break;
1218           }
1219           imark = i;
1220         }
1221         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1222         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1223       }
1224     }
1225     else {
1226       if (idx) *idx = 0;
1227       if (v)   *v   = 0;
1228     }
1229   }
1230   *nz = nztot;
1231   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1232   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1233   return 0;
1234 }
1235 
1236 #undef __FUNC__
1237 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1238 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1239 {
1240   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1241   if (aij->getrowactive == PETSC_FALSE) {
1242     SETERRQ(1,0,"MatGetRow not called");
1243   }
1244   aij->getrowactive = PETSC_FALSE;
1245   return 0;
1246 }
1247 
1248 #undef __FUNC__
1249 #define __FUNC__ "MatNorm_MPIAIJ"
1250 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1251 {
1252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1253   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1254   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1255   double     sum = 0.0;
1256   Scalar     *v;
1257 
1258   if (aij->size == 1) {
1259     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1260   } else {
1261     if (type == NORM_FROBENIUS) {
1262       v = amat->a;
1263       for (i=0; i<amat->nz; i++ ) {
1264 #if defined(PETSC_COMPLEX)
1265         sum += real(conj(*v)*(*v)); v++;
1266 #else
1267         sum += (*v)*(*v); v++;
1268 #endif
1269       }
1270       v = bmat->a;
1271       for (i=0; i<bmat->nz; i++ ) {
1272 #if defined(PETSC_COMPLEX)
1273         sum += real(conj(*v)*(*v)); v++;
1274 #else
1275         sum += (*v)*(*v); v++;
1276 #endif
1277       }
1278       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1279       *norm = sqrt(*norm);
1280     }
1281     else if (type == NORM_1) { /* max column norm */
1282       double *tmp, *tmp2;
1283       int    *jj, *garray = aij->garray;
1284       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1285       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1286       PetscMemzero(tmp,aij->N*sizeof(double));
1287       *norm = 0.0;
1288       v = amat->a; jj = amat->j;
1289       for ( j=0; j<amat->nz; j++ ) {
1290         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1291       }
1292       v = bmat->a; jj = bmat->j;
1293       for ( j=0; j<bmat->nz; j++ ) {
1294         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1295       }
1296       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1297       for ( j=0; j<aij->N; j++ ) {
1298         if (tmp2[j] > *norm) *norm = tmp2[j];
1299       }
1300       PetscFree(tmp); PetscFree(tmp2);
1301     }
1302     else if (type == NORM_INFINITY) { /* max row norm */
1303       double ntemp = 0.0;
1304       for ( j=0; j<amat->m; j++ ) {
1305         v = amat->a + amat->i[j] + shift;
1306         sum = 0.0;
1307         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1308           sum += PetscAbsScalar(*v); v++;
1309         }
1310         v = bmat->a + bmat->i[j] + shift;
1311         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1312           sum += PetscAbsScalar(*v); v++;
1313         }
1314         if (sum > ntemp) ntemp = sum;
1315       }
1316       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1317     }
1318     else {
1319       SETERRQ(1,0,"No support for two norm");
1320     }
1321   }
1322   return 0;
1323 }
1324 
1325 #undef __FUNC__
1326 #define __FUNC__ "MatTranspose_MPIAIJ"
1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328 {
1329   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1330   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1331   int        ierr,shift = Aloc->indexshift;
1332   Mat        B;
1333   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1334   Scalar     *array;
1335 
1336   if (matout == PETSC_NULL && M != N)
1337     SETERRQ(1,0,"Square matrix only for in-place");
1338   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1339          PETSC_NULL,&B); CHKERRQ(ierr);
1340 
1341   /* copy over the A part */
1342   Aloc = (Mat_SeqAIJ*) a->A->data;
1343   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1344   row = a->rstart;
1345   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1346   for ( i=0; i<m; i++ ) {
1347     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1348     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1349   }
1350   aj = Aloc->j;
1351   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1352 
1353   /* copy over the B part */
1354   Aloc = (Mat_SeqAIJ*) a->B->data;
1355   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1356   row = a->rstart;
1357   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1358   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1359   for ( i=0; i<m; i++ ) {
1360     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1361     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1362   }
1363   PetscFree(ct);
1364   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1365   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1366   if (matout != PETSC_NULL) {
1367     *matout = B;
1368   } else {
1369     /* This isn't really an in-place transpose .... but free data structures from a */
1370     PetscFree(a->rowners);
1371     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1372     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1373     if (a->colmap) PetscFree(a->colmap);
1374     if (a->garray) PetscFree(a->garray);
1375     if (a->lvec) VecDestroy(a->lvec);
1376     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1377     PetscFree(a);
1378     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1379     PetscHeaderDestroy(B);
1380   }
1381   return 0;
1382 }
1383 
1384 #undef __FUNC__
1385 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1387 {
1388   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1389   Mat a = aij->A, b = aij->B;
1390   int ierr,s1,s2,s3;
1391 
1392   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1393   if (rr) {
1394     s3 = aij->n;
1395     VecGetLocalSize_Fast(rr,s1);
1396     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1399   }
1400   if (ll) {
1401     VecGetLocalSize_Fast(ll,s1);
1402     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1403     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1404   }
1405   /* scale  the diagonal block */
1406   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1407 
1408   if (rr) {
1409     /* Do a scatter end and then right scale the off-diagonal block */
1410     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1411     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1412   }
1413 
1414   return 0;
1415 }
1416 
1417 
1418 extern int MatPrintHelp_SeqAIJ(Mat);
1419 #undef __FUNC__
1420 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1421 int MatPrintHelp_MPIAIJ(Mat A)
1422 {
1423   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1424 
1425   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1426   else return 0;
1427 }
1428 
1429 #undef __FUNC__
1430 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1431 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1432 {
1433   *bs = 1;
1434   return 0;
1435 }
1436 #undef __FUNC__
1437 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1438 int MatSetUnfactored_MPIAIJ(Mat A)
1439 {
1440   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1441   int        ierr;
1442   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1443   return 0;
1444 }
1445 
1446 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1447 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1448 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1449 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1450 /* -------------------------------------------------------------------*/
1451 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1452        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1453        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1454        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1455        0,0,
1456        0,0,
1457        0,0,
1458        MatRelax_MPIAIJ,
1459        MatTranspose_MPIAIJ,
1460        MatGetInfo_MPIAIJ,0,
1461        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1462        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1463        0,
1464        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1465        0,0,0,0,
1466        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1467        0,0,
1468        0,0,MatConvertSameType_MPIAIJ,0,0,
1469        0,0,0,
1470        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1471        MatPrintHelp_MPIAIJ,
1472        MatScale_MPIAIJ,0,0,0,
1473        MatGetBlockSize_MPIAIJ,0,0,0,0,
1474        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1475 
1476 
1477 #undef __FUNC__
1478 #define __FUNC__ "MatCreateMPIAIJ"
1479 /*@C
1480    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1481    (the default parallel PETSc format).  For good matrix assembly performance
1482    the user should preallocate the matrix storage by setting the parameters
1483    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1484    performance can be increased by more than a factor of 50.
1485 
1486    Input Parameters:
1487 .  comm - MPI communicator
1488 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1489            This value should be the same as the local size used in creating the
1490            y vector for the matrix-vector product y = Ax.
1491 .  n - This value should be the same as the local size used in creating the
1492        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1493        calculated if N is given) For square matrices n is almost always m.
1494 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1495 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1496 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1497            (same for all local rows)
1498 .  d_nzz - array containing the number of nonzeros in the various rows of the
1499            diagonal portion of the local submatrix (possibly different for each row)
1500            or PETSC_NULL. You must leave room for the diagonal entry even if
1501            it is zero.
1502 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1503            submatrix (same for all local rows).
1504 .  o_nzz - array containing the number of nonzeros in the various rows of the
1505            off-diagonal portion of the local submatrix (possibly different for
1506            each row) or PETSC_NULL.
1507 
1508    Output Parameter:
1509 .  A - the matrix
1510 
1511    Notes:
1512    The AIJ format (also called the Yale sparse matrix format or
1513    compressed row storage), is fully compatible with standard Fortran 77
1514    storage.  That is, the stored row and column indices can begin at
1515    either one (as in Fortran) or zero.  See the users manual for details.
1516 
1517    The user MUST specify either the local or global matrix dimensions
1518    (possibly both).
1519 
1520    By default, this format uses inodes (identical nodes) when possible.
1521    We search for consecutive rows with the same nonzero structure, thereby
1522    reusing matrix information to achieve increased efficiency.
1523 
1524    Options Database Keys:
1525 $    -mat_aij_no_inode  - Do not use inodes
1526 $    -mat_aij_inode_limit <limit> - Set inode limit.
1527 $        (max limit=5)
1528 $    -mat_aij_oneindex - Internally use indexing starting at 1
1529 $        rather than 0.  Note: When calling MatSetValues(),
1530 $        the user still MUST index entries starting at 0!
1531 
1532    Storage Information:
1533    For a square global matrix we define each processor's diagonal portion
1534    to be its local rows and the corresponding columns (a square submatrix);
1535    each processor's off-diagonal portion encompasses the remainder of the
1536    local matrix (a rectangular submatrix).
1537 
1538    The user can specify preallocated storage for the diagonal part of
1539    the local submatrix with either d_nz or d_nnz (not both).  Set
1540    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1541    memory allocation.  Likewise, specify preallocated storage for the
1542    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1543 
1544    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1545    the figure below we depict these three local rows and all columns (0-11).
1546 
1547 $          0 1 2 3 4 5 6 7 8 9 10 11
1548 $         -------------------
1549 $  row 3  |  o o o d d d o o o o o o
1550 $  row 4  |  o o o d d d o o o o o o
1551 $  row 5  |  o o o d d d o o o o o o
1552 $         -------------------
1553 $
1554 
1555    Thus, any entries in the d locations are stored in the d (diagonal)
1556    submatrix, and any entries in the o locations are stored in the
1557    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1558    stored simply in the MATSEQAIJ format for compressed row storage.
1559 
1560    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1561    and o_nz should indicate the number of nonzeros per row in the o matrix.
1562    In general, for PDE problems in which most nonzeros are near the diagonal,
1563    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1564    or you will get TERRIBLE performance; see the users' manual chapter on
1565    matrices.
1566 
1567 .keywords: matrix, aij, compressed row, sparse, parallel
1568 
1569 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1570 @*/
1571 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1572                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1573 {
1574   Mat          B;
1575   Mat_MPIAIJ   *b;
1576   int          ierr, i,sum[2],work[2],size;
1577 
1578   *A = 0;
1579   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1580   PLogObjectCreate(B);
1581   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1582   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1583   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1584   MPI_Comm_size(comm,&size);
1585   if (size == 1) {
1586     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1587     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1588     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1589     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1590     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1591     B->ops.solve             = MatSolve_MPIAIJ;
1592     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1593     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1594     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1595     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1596   }
1597   B->destroy    = MatDestroy_MPIAIJ;
1598   B->view       = MatView_MPIAIJ;
1599   B->factor     = 0;
1600   B->assembled  = PETSC_FALSE;
1601   B->mapping    = 0;
1602 
1603   B->insertmode = NOT_SET_VALUES;
1604   MPI_Comm_rank(comm,&b->rank);
1605   MPI_Comm_size(comm,&b->size);
1606 
1607   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1608     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1609 
1610   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1611     work[0] = m; work[1] = n;
1612     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1613     if (M == PETSC_DECIDE) M = sum[0];
1614     if (N == PETSC_DECIDE) N = sum[1];
1615   }
1616   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1617   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1618   b->m = m; B->m = m;
1619   b->n = n; B->n = n;
1620   b->N = N; B->N = N;
1621   b->M = M; B->M = M;
1622 
1623   /* build local table of row and column ownerships */
1624   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1625   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1626   b->cowners = b->rowners + b->size + 2;
1627   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1628   b->rowners[0] = 0;
1629   for ( i=2; i<=b->size; i++ ) {
1630     b->rowners[i] += b->rowners[i-1];
1631   }
1632   b->rstart = b->rowners[b->rank];
1633   b->rend   = b->rowners[b->rank+1];
1634   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1635   b->cowners[0] = 0;
1636   for ( i=2; i<=b->size; i++ ) {
1637     b->cowners[i] += b->cowners[i-1];
1638   }
1639   b->cstart = b->cowners[b->rank];
1640   b->cend   = b->cowners[b->rank+1];
1641 
1642   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1643   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1644   PLogObjectParent(B,b->A);
1645   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1646   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1647   PLogObjectParent(B,b->B);
1648 
1649   /* build cache for off array entries formed */
1650   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1651   b->donotstash  = 0;
1652   b->colmap      = 0;
1653   b->garray      = 0;
1654   b->roworiented = 1;
1655 
1656   /* stuff used for matrix vector multiply */
1657   b->lvec      = 0;
1658   b->Mvctx     = 0;
1659 
1660   /* stuff for MatGetRow() */
1661   b->rowindices   = 0;
1662   b->rowvalues    = 0;
1663   b->getrowactive = PETSC_FALSE;
1664 
1665   *A = B;
1666   return 0;
1667 }
1668 
1669 #undef __FUNC__
1670 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1671 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1672 {
1673   Mat        mat;
1674   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1675   int        ierr, len=0, flg;
1676 
1677   *newmat       = 0;
1678   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1679   PLogObjectCreate(mat);
1680   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1681   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1682   mat->destroy    = MatDestroy_MPIAIJ;
1683   mat->view       = MatView_MPIAIJ;
1684   mat->factor     = matin->factor;
1685   mat->assembled  = PETSC_TRUE;
1686 
1687   a->m = mat->m   = oldmat->m;
1688   a->n = mat->n   = oldmat->n;
1689   a->M = mat->M   = oldmat->M;
1690   a->N = mat->N   = oldmat->N;
1691 
1692   a->rstart       = oldmat->rstart;
1693   a->rend         = oldmat->rend;
1694   a->cstart       = oldmat->cstart;
1695   a->cend         = oldmat->cend;
1696   a->size         = oldmat->size;
1697   a->rank         = oldmat->rank;
1698   mat->insertmode = NOT_SET_VALUES;
1699   a->rowvalues    = 0;
1700   a->getrowactive = PETSC_FALSE;
1701 
1702   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1703   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1704   a->cowners = a->rowners + a->size + 2;
1705   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1706   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1707   if (oldmat->colmap) {
1708     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1709     PLogObjectMemory(mat,(a->N)*sizeof(int));
1710     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1711   } else a->colmap = 0;
1712   if (oldmat->garray) {
1713     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1714     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1715     PLogObjectMemory(mat,len*sizeof(int));
1716     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1717   } else a->garray = 0;
1718 
1719   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1720   PLogObjectParent(mat,a->lvec);
1721   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1722   PLogObjectParent(mat,a->Mvctx);
1723   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1724   PLogObjectParent(mat,a->A);
1725   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1726   PLogObjectParent(mat,a->B);
1727   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1728   if (flg) {
1729     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1730   }
1731   *newmat = mat;
1732   return 0;
1733 }
1734 
1735 #include "sys.h"
1736 
1737 #undef __FUNC__
1738 #define __FUNC__ "MatLoad_MPIAIJ"
1739 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1740 {
1741   Mat          A;
1742   int          i, nz, ierr, j,rstart, rend, fd;
1743   Scalar       *vals,*svals;
1744   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1745   MPI_Status   status;
1746   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1747   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1748   int          tag = ((PetscObject)viewer)->tag;
1749 
1750   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1751   if (!rank) {
1752     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1753     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1754     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1755   }
1756 
1757   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1758   M = header[1]; N = header[2];
1759   /* determine ownership of all rows */
1760   m = M/size + ((M % size) > rank);
1761   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1762   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1763   rowners[0] = 0;
1764   for ( i=2; i<=size; i++ ) {
1765     rowners[i] += rowners[i-1];
1766   }
1767   rstart = rowners[rank];
1768   rend   = rowners[rank+1];
1769 
1770   /* distribute row lengths to all processors */
1771   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1772   offlens = ourlens + (rend-rstart);
1773   if (!rank) {
1774     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1775     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1776     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1777     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1778     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1779     PetscFree(sndcounts);
1780   }
1781   else {
1782     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1783   }
1784 
1785   if (!rank) {
1786     /* calculate the number of nonzeros on each processor */
1787     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1788     PetscMemzero(procsnz,size*sizeof(int));
1789     for ( i=0; i<size; i++ ) {
1790       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1791         procsnz[i] += rowlengths[j];
1792       }
1793     }
1794     PetscFree(rowlengths);
1795 
1796     /* determine max buffer needed and allocate it */
1797     maxnz = 0;
1798     for ( i=0; i<size; i++ ) {
1799       maxnz = PetscMax(maxnz,procsnz[i]);
1800     }
1801     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1802 
1803     /* read in my part of the matrix column indices  */
1804     nz = procsnz[0];
1805     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1806     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1807 
1808     /* read in every one elses and ship off */
1809     for ( i=1; i<size; i++ ) {
1810       nz = procsnz[i];
1811       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1812       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1813     }
1814     PetscFree(cols);
1815   }
1816   else {
1817     /* determine buffer space needed for message */
1818     nz = 0;
1819     for ( i=0; i<m; i++ ) {
1820       nz += ourlens[i];
1821     }
1822     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1823 
1824     /* receive message of column indices*/
1825     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1826     MPI_Get_count(&status,MPI_INT,&maxnz);
1827     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1828   }
1829 
1830   /* loop over local rows, determining number of off diagonal entries */
1831   PetscMemzero(offlens,m*sizeof(int));
1832   jj = 0;
1833   for ( i=0; i<m; i++ ) {
1834     for ( j=0; j<ourlens[i]; j++ ) {
1835       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1836       jj++;
1837     }
1838   }
1839 
1840   /* create our matrix */
1841   for ( i=0; i<m; i++ ) {
1842     ourlens[i] -= offlens[i];
1843   }
1844   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1845   A = *newmat;
1846   MatSetOption(A,MAT_COLUMNS_SORTED);
1847   for ( i=0; i<m; i++ ) {
1848     ourlens[i] += offlens[i];
1849   }
1850 
1851   if (!rank) {
1852     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1853 
1854     /* read in my part of the matrix numerical values  */
1855     nz = procsnz[0];
1856     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1857 
1858     /* insert into matrix */
1859     jj      = rstart;
1860     smycols = mycols;
1861     svals   = vals;
1862     for ( i=0; i<m; i++ ) {
1863       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1864       smycols += ourlens[i];
1865       svals   += ourlens[i];
1866       jj++;
1867     }
1868 
1869     /* read in other processors and ship out */
1870     for ( i=1; i<size; i++ ) {
1871       nz = procsnz[i];
1872       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1873       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1874     }
1875     PetscFree(procsnz);
1876   }
1877   else {
1878     /* receive numeric values */
1879     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1880 
1881     /* receive message of values*/
1882     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1883     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1884     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1885 
1886     /* insert into matrix */
1887     jj      = rstart;
1888     smycols = mycols;
1889     svals   = vals;
1890     for ( i=0; i<m; i++ ) {
1891       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1892       smycols += ourlens[i];
1893       svals   += ourlens[i];
1894       jj++;
1895     }
1896   }
1897   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1898 
1899   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1900   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1901   return 0;
1902 }
1903