xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14 
15 /*
16   Local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
19 a slightly higher hash table cost; without it it is not scalable (each processor
20 has an order N integer array but is fast to acess.
21 */
22 #undef __FUNCT__
23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
24 int CreateColmap_MPIAIJ_Private(Mat mat)
25 {
26   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
27   int        n = aij->B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
32   for (i=0; i<n; i++){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
37   PetscLogObjectMemory(mat,mat->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for (_i=low; _i<high; _i++) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
71         PetscScalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
77         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
78         new_j   = (int*)(new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
83         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for (ii=N; ii>=_i; ii--) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for (_i=low; _i<high; _i++) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
145         PetscScalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
151         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
152         new_j   = (int*)(new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
157         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for (ii=N; ii>=_i; ii--) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
198   PetscScalar  value;
199   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
200   int          cstart = aij->cstart,cend = aij->cend,row,col;
201   PetscTruth   roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat          A = aij->A;
205   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
206   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
207   PetscScalar  *aa = a->a;
208   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
209   Mat          B = aij->B;
210   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
211   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
212   PetscScalar  *ba = b->a;
213 
214   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
215   int          nonew = a->nonew,shift = a->indexshift;
216   PetscScalar  *ap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           if (ignorezeroentries && value == 0.0) continue;
231           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ*)B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           if (ignorezeroentries && value == 0.0) continue;
260           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
261           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
282 {
283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
284   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
285   int        cstart = aij->cstart,cend = aij->cend,row,col;
286 
287   PetscFunctionBegin;
288   for (i=0; i<m; i++) {
289     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
290     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
291     if (idxm[i] >= rstart && idxm[i] < rend) {
292       row = idxm[i] - rstart;
293       for (j=0; j<n; j++) {
294         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
295         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
296         if (idxn[j] >= cstart && idxn[j] < cend){
297           col = idxn[j] - cstart;
298           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
299         } else {
300           if (!aij->colmap) {
301             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
302           }
303 #if defined (PETSC_USE_CTABLE)
304           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
305           col --;
306 #else
307           col = aij->colmap[idxn[j]] - 1;
308 #endif
309           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
310           else {
311             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
312           }
313         }
314       }
315     } else {
316       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
325 {
326   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
327   int         ierr,nstash,reallocs;
328   InsertMode  addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 #if defined(PETSC_HAVE_SUPERLUDIST)
408   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
409   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
410 #endif
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 int MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419   int        ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
433   int            *procs,*nprocs,j,idx,nsends,*work,row;
434   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
435   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
436   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
437   MPI_Comm       comm = A->comm;
438   MPI_Request    *send_waits,*recv_waits;
439   MPI_Status     recv_status,*send_status;
440   IS             istmp;
441   PetscTruth     found;
442 
443   PetscFunctionBegin;
444   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
445   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
446 
447   /*  first count number of contributors to each processor */
448   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
449   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
450   procs  = nprocs + size;
451   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
466   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
467   nrecvs = work[size+rank];
468   nmax   = work[rank];
469   ierr   = PetscFree(work);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (procs[i]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
639   int        ierr;
640 
641   PetscFunctionBegin;
642   /* do nondiagonal part */
643   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
644   /* send it on its way */
645   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
646   /* do local part */
647   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
648   /* receive remote parts: note this assumes the values are not actually */
649   /* inserted in yy until the next line, which is true for my implementation*/
650   /* but is not perhaps always true. */
651   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*
656   This only works correctly for square matrices where the subblock A->A is the
657    diagonal block
658 */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
662 {
663   int        ierr;
664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
665 
666   PetscFunctionBegin;
667   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
668   if (a->rstart != a->cstart || a->rend != a->cend) {
669     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
670   }
671   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatScale_MPIAIJ"
677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
678 {
679   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
680   int        ierr;
681 
682   PetscFunctionBegin;
683   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
684   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatDestroy_MPIAIJ"
690 int MatDestroy_MPIAIJ(Mat mat)
691 {
692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696 #if defined(PETSC_USE_LOG)
697   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
698 #endif
699   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
700   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
703 #if defined (PETSC_USE_CTABLE)
704   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
705 #else
706   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
707 #endif
708   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
709   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
710   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
711   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
712   ierr = PetscFree(aij);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
720 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
724   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
725   PetscTruth        isdraw,isascii,flg;
726   PetscViewer       sviewer;
727   PetscViewerFormat format;
728 
729   PetscFunctionBegin;
730   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
732   if (isascii) {
733     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
734     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
735       MatInfo info;
736       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
737       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
738       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
739       if (flg) {
740         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
741 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
742       } else {
743         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
744 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
745       }
746       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
749       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
750       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
751       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
752       PetscFunctionReturn(0);
753     } else if (format == PETSC_VIEWER_ASCII_INFO) {
754       PetscFunctionReturn(0);
755     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
756 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
757       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
758 #endif
759       PetscFunctionReturn(0);
760     }
761   } else if (isdraw) {
762     PetscDraw       draw;
763     PetscTruth isnull;
764     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
765     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
766   }
767 
768   if (size == 1) {
769     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
770     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
771   } else {
772     /* assemble the entire matrix onto first processor. */
773     Mat         A;
774     Mat_SeqAIJ *Aloc;
775     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
776     PetscScalar *a;
777 
778     if (!rank) {
779       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
780     } else {
781       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
782     }
783     PetscLogObjectParent(mat,A);
784 
785     /* copy over the A part */
786     Aloc = (Mat_SeqAIJ*)aij->A->data;
787     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
788     row = aij->rstart;
789     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
790     for (i=0; i<m; i++) {
791       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
792       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
793     }
794     aj = Aloc->j;
795     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
796 
797     /* copy over the B part */
798     Aloc = (Mat_SeqAIJ*)aij->B->data;
799     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     row  = aij->rstart;
801     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
802     ct   = cols;
803     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
804     for (i=0; i<m; i++) {
805       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
806       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
807     }
808     ierr = PetscFree(ct);CHKERRQ(ierr);
809     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     /*
812        Everyone has to call to draw the matrix since the graphics waits are
813        synchronized across all processors that share the PetscDraw object
814     */
815     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
816     if (!rank) {
817       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
818       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
819     }
820     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
821     ierr = MatDestroy(A);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatView_MPIAIJ"
828 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
829 {
830   int        ierr;
831   PetscTruth isascii,isdraw,issocket,isbinary;
832 
833   PetscFunctionBegin;
834   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
835   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
836   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
837   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
838   if (isascii || isdraw || isbinary || issocket) {
839     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
840   } else {
841     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatRelax_MPIAIJ"
850 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
851 {
852   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
853   int          ierr;
854   Vec          bb1;
855   PetscScalar  mone=-1.0;
856 
857   PetscFunctionBegin;
858   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
859 
860   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
861 
862   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
863     if (flag & SOR_ZERO_INITIAL_GUESS) {
864       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
865       its--;
866     }
867 
868     while (its--) {
869       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
870       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
871 
872       /* update rhs: bb1 = bb - B*x */
873       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
874       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
875 
876       /* local sweep */
877       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
878       CHKERRQ(ierr);
879     }
880   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
881     if (flag & SOR_ZERO_INITIAL_GUESS) {
882       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
883       its--;
884     }
885     while (its--) {
886       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
887       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
888 
889       /* update rhs: bb1 = bb - B*x */
890       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
891       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
892 
893       /* local sweep */
894       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
895       CHKERRQ(ierr);
896     }
897   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
898     if (flag & SOR_ZERO_INITIAL_GUESS) {
899       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
900       its--;
901     }
902     while (its--) {
903       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
904       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
905 
906       /* update rhs: bb1 = bb - B*x */
907       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
908       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
909 
910       /* local sweep */
911       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
912       CHKERRQ(ierr);
913     }
914   } else {
915     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
916   }
917 
918   ierr = VecDestroy(bb1);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetInfo_MPIAIJ"
924 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
925 {
926   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
927   Mat        A = mat->A,B = mat->B;
928   int        ierr;
929   PetscReal  isend[5],irecv[5];
930 
931   PetscFunctionBegin;
932   info->block_size     = 1.0;
933   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
934   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
935   isend[3] = info->memory;  isend[4] = info->mallocs;
936   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
937   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
938   isend[3] += info->memory;  isend[4] += info->mallocs;
939   if (flag == MAT_LOCAL) {
940     info->nz_used      = isend[0];
941     info->nz_allocated = isend[1];
942     info->nz_unneeded  = isend[2];
943     info->memory       = isend[3];
944     info->mallocs      = isend[4];
945   } else if (flag == MAT_GLOBAL_MAX) {
946     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
947     info->nz_used      = irecv[0];
948     info->nz_allocated = irecv[1];
949     info->nz_unneeded  = irecv[2];
950     info->memory       = irecv[3];
951     info->mallocs      = irecv[4];
952   } else if (flag == MAT_GLOBAL_SUM) {
953     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
954     info->nz_used      = irecv[0];
955     info->nz_allocated = irecv[1];
956     info->nz_unneeded  = irecv[2];
957     info->memory       = irecv[3];
958     info->mallocs      = irecv[4];
959   }
960   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
961   info->fill_ratio_needed = 0;
962   info->factor_mallocs    = 0;
963   info->rows_global       = (double)matin->M;
964   info->columns_global    = (double)matin->N;
965   info->rows_local        = (double)matin->m;
966   info->columns_local     = (double)matin->N;
967 
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatSetOption_MPIAIJ"
973 int MatSetOption_MPIAIJ(Mat A,MatOption op)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
976   int        ierr;
977 
978   PetscFunctionBegin;
979   switch (op) {
980   case MAT_NO_NEW_NONZERO_LOCATIONS:
981   case MAT_YES_NEW_NONZERO_LOCATIONS:
982   case MAT_COLUMNS_UNSORTED:
983   case MAT_COLUMNS_SORTED:
984   case MAT_NEW_NONZERO_ALLOCATION_ERR:
985   case MAT_KEEP_ZEROED_ROWS:
986   case MAT_NEW_NONZERO_LOCATION_ERR:
987   case MAT_USE_INODES:
988   case MAT_DO_NOT_USE_INODES:
989   case MAT_IGNORE_ZERO_ENTRIES:
990     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
991     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
992     break;
993   case MAT_ROW_ORIENTED:
994     a->roworiented = PETSC_TRUE;
995     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
996     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
997     break;
998   case MAT_ROWS_SORTED:
999   case MAT_ROWS_UNSORTED:
1000   case MAT_YES_NEW_DIAGONALS:
1001   case MAT_USE_SINGLE_PRECISION_SOLVES:
1002     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1003     break;
1004   case MAT_COLUMN_ORIENTED:
1005     a->roworiented = PETSC_FALSE;
1006     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1007     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1008     break;
1009   case MAT_IGNORE_OFF_PROC_ENTRIES:
1010     a->donotstash = PETSC_TRUE;
1011     break;
1012   case MAT_NO_NEW_DIAGONALS:
1013     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1014   default:
1015     SETERRQ(PETSC_ERR_SUP,"unknown option");
1016   }
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatGetRow_MPIAIJ"
1022 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1023 {
1024   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1025   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1026   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1027   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1028   int          *cmap,*idx_p;
1029 
1030   PetscFunctionBegin;
1031   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1032   mat->getrowactive = PETSC_TRUE;
1033 
1034   if (!mat->rowvalues && (idx || v)) {
1035     /*
1036         allocate enough space to hold information from the longest row.
1037     */
1038     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1039     int     max = 1,tmp;
1040     for (i=0; i<matin->m; i++) {
1041       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1042       if (max < tmp) { max = tmp; }
1043     }
1044     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1045     mat->rowindices = (int*)(mat->rowvalues + max);
1046   }
1047 
1048   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1049   lrow = row - rstart;
1050 
1051   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1052   if (!v)   {pvA = 0; pvB = 0;}
1053   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1054   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1055   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1056   nztot = nzA + nzB;
1057 
1058   cmap  = mat->garray;
1059   if (v  || idx) {
1060     if (nztot) {
1061       /* Sort by increasing column numbers, assuming A and B already sorted */
1062       int imark = -1;
1063       if (v) {
1064         *v = v_p = mat->rowvalues;
1065         for (i=0; i<nzB; i++) {
1066           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1067           else break;
1068         }
1069         imark = i;
1070         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1071         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1072       }
1073       if (idx) {
1074         *idx = idx_p = mat->rowindices;
1075         if (imark > -1) {
1076           for (i=0; i<imark; i++) {
1077             idx_p[i] = cmap[cworkB[i]];
1078           }
1079         } else {
1080           for (i=0; i<nzB; i++) {
1081             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1082             else break;
1083           }
1084           imark = i;
1085         }
1086         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1087         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1088       }
1089     } else {
1090       if (idx) *idx = 0;
1091       if (v)   *v   = 0;
1092     }
1093   }
1094   *nz = nztot;
1095   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1096   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1102 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1103 {
1104   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1105 
1106   PetscFunctionBegin;
1107   if (aij->getrowactive == PETSC_FALSE) {
1108     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1109   }
1110   aij->getrowactive = PETSC_FALSE;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatNorm_MPIAIJ"
1116 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1117 {
1118   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1119   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1120   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1121   PetscReal    sum = 0.0;
1122   PetscScalar  *v;
1123 
1124   PetscFunctionBegin;
1125   if (aij->size == 1) {
1126     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1127   } else {
1128     if (type == NORM_FROBENIUS) {
1129       v = amat->a;
1130       for (i=0; i<amat->nz; i++) {
1131 #if defined(PETSC_USE_COMPLEX)
1132         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1133 #else
1134         sum += (*v)*(*v); v++;
1135 #endif
1136       }
1137       v = bmat->a;
1138       for (i=0; i<bmat->nz; i++) {
1139 #if defined(PETSC_USE_COMPLEX)
1140         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1141 #else
1142         sum += (*v)*(*v); v++;
1143 #endif
1144       }
1145       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1146       *norm = sqrt(*norm);
1147     } else if (type == NORM_1) { /* max column norm */
1148       PetscReal *tmp,*tmp2;
1149       int    *jj,*garray = aij->garray;
1150       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1151       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1152       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1153       *norm = 0.0;
1154       v = amat->a; jj = amat->j;
1155       for (j=0; j<amat->nz; j++) {
1156         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1157       }
1158       v = bmat->a; jj = bmat->j;
1159       for (j=0; j<bmat->nz; j++) {
1160         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1161       }
1162       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1163       for (j=0; j<mat->N; j++) {
1164         if (tmp2[j] > *norm) *norm = tmp2[j];
1165       }
1166       ierr = PetscFree(tmp);CHKERRQ(ierr);
1167       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1168     } else if (type == NORM_INFINITY) { /* max row norm */
1169       PetscReal ntemp = 0.0;
1170       for (j=0; j<aij->A->m; j++) {
1171         v = amat->a + amat->i[j] + shift;
1172         sum = 0.0;
1173         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1174           sum += PetscAbsScalar(*v); v++;
1175         }
1176         v = bmat->a + bmat->i[j] + shift;
1177         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1178           sum += PetscAbsScalar(*v); v++;
1179         }
1180         if (sum > ntemp) ntemp = sum;
1181       }
1182       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1183     } else {
1184       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1185     }
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatTranspose_MPIAIJ"
1192 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1193 {
1194   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1195   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1196   int          ierr,shift = Aloc->indexshift;
1197   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1198   Mat          B;
1199   PetscScalar  *array;
1200 
1201   PetscFunctionBegin;
1202   if (!matout && M != N) {
1203     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1204   }
1205 
1206   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1207 
1208   /* copy over the A part */
1209   Aloc = (Mat_SeqAIJ*)a->A->data;
1210   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1211   row = a->rstart;
1212   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1213   for (i=0; i<m; i++) {
1214     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1215     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1216   }
1217   aj = Aloc->j;
1218   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1219 
1220   /* copy over the B part */
1221   Aloc = (Mat_SeqAIJ*)a->B->data;
1222   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1223   row  = a->rstart;
1224   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1225   ct   = cols;
1226   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1227   for (i=0; i<m; i++) {
1228     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1229     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1230   }
1231   ierr = PetscFree(ct);CHKERRQ(ierr);
1232   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1233   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1234   if (matout) {
1235     *matout = B;
1236   } else {
1237     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1244 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1245 {
1246   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1247   Mat        a = aij->A,b = aij->B;
1248   int        ierr,s1,s2,s3;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1252   if (rr) {
1253     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1254     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1255     /* Overlap communication with computation. */
1256     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1257   }
1258   if (ll) {
1259     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1260     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1261     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1262   }
1263   /* scale  the diagonal block */
1264   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1265 
1266   if (rr) {
1267     /* Do a scatter end and then right scale the off-diagonal block */
1268     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1269     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1270   }
1271 
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1278 int MatPrintHelp_MPIAIJ(Mat A)
1279 {
1280   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1281   int        ierr;
1282 
1283   PetscFunctionBegin;
1284   if (!a->rank) {
1285     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1292 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1293 {
1294   PetscFunctionBegin;
1295   *bs = 1;
1296   PetscFunctionReturn(0);
1297 }
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1300 int MatSetUnfactored_MPIAIJ(Mat A)
1301 {
1302   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1303   int        ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatEqual_MPIAIJ"
1312 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1313 {
1314   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1315   Mat        a,b,c,d;
1316   PetscTruth flg;
1317   int        ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1321   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1322   a = matA->A; b = matA->B;
1323   c = matB->A; d = matB->B;
1324 
1325   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1326   if (flg == PETSC_TRUE) {
1327     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1328   }
1329   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatCopy_MPIAIJ"
1335 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1336 {
1337   int        ierr;
1338   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1339   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1340   PetscTruth flg;
1341 
1342   PetscFunctionBegin;
1343   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1344   if (str != SAME_NONZERO_PATTERN || !flg) {
1345     /* because of the column compression in the off-processor part of the matrix a->B,
1346        the number of columns in a->B and b->B may be different, hence we cannot call
1347        the MatCopy() directly on the two parts. If need be, we can provide a more
1348        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1349        then copying the submatrices */
1350     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1351   } else {
1352     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1353     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1360 int MatSetUpPreallocation_MPIAIJ(Mat A)
1361 {
1362   int        ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1370 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1371 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1372 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1373 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1374 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1375 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1376 #endif
1377 
1378 #include "petscblaslapack.h"
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatAXPY_MPIAIJ"
1382 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1383 {
1384   int        ierr,one;
1385   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1386   Mat_SeqAIJ *x,*y;
1387 
1388   PetscFunctionBegin;
1389   if (str == SAME_NONZERO_PATTERN) {
1390     x  = (Mat_SeqAIJ *)xx->A->data;
1391     y  = (Mat_SeqAIJ *)yy->A->data;
1392     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1393     x  = (Mat_SeqAIJ *)xx->B->data;
1394     y  = (Mat_SeqAIJ *)yy->B->data;
1395     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1396   } else {
1397     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 /* -------------------------------------------------------------------*/
1403 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1404        MatGetRow_MPIAIJ,
1405        MatRestoreRow_MPIAIJ,
1406        MatMult_MPIAIJ,
1407        MatMultAdd_MPIAIJ,
1408        MatMultTranspose_MPIAIJ,
1409        MatMultTransposeAdd_MPIAIJ,
1410        0,
1411        0,
1412        0,
1413        0,
1414        0,
1415        0,
1416        MatRelax_MPIAIJ,
1417        MatTranspose_MPIAIJ,
1418        MatGetInfo_MPIAIJ,
1419        MatEqual_MPIAIJ,
1420        MatGetDiagonal_MPIAIJ,
1421        MatDiagonalScale_MPIAIJ,
1422        MatNorm_MPIAIJ,
1423        MatAssemblyBegin_MPIAIJ,
1424        MatAssemblyEnd_MPIAIJ,
1425        0,
1426        MatSetOption_MPIAIJ,
1427        MatZeroEntries_MPIAIJ,
1428        MatZeroRows_MPIAIJ,
1429 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1430        MatLUFactorSymbolic_MPIAIJ_TFS,
1431 #else
1432        0,
1433 #endif
1434        0,
1435        0,
1436        0,
1437        MatSetUpPreallocation_MPIAIJ,
1438        0,
1439        0,
1440        0,
1441        0,
1442        MatDuplicate_MPIAIJ,
1443        0,
1444        0,
1445        0,
1446        0,
1447        MatAXPY_MPIAIJ,
1448        MatGetSubMatrices_MPIAIJ,
1449        MatIncreaseOverlap_MPIAIJ,
1450        MatGetValues_MPIAIJ,
1451        MatCopy_MPIAIJ,
1452        MatPrintHelp_MPIAIJ,
1453        MatScale_MPIAIJ,
1454        0,
1455        0,
1456        0,
1457        MatGetBlockSize_MPIAIJ,
1458        0,
1459        0,
1460        0,
1461        0,
1462        MatFDColoringCreate_MPIAIJ,
1463        0,
1464        MatSetUnfactored_MPIAIJ,
1465        0,
1466        0,
1467        MatGetSubMatrix_MPIAIJ,
1468        MatDestroy_MPIAIJ,
1469        MatView_MPIAIJ,
1470        MatGetPetscMaps_Petsc,
1471        0,
1472        0,
1473        0,
1474        0,
1475        0,
1476        0,
1477        0,
1478        0,
1479        MatSetColoring_MPIAIJ,
1480        MatSetValuesAdic_MPIAIJ,
1481        MatSetValuesAdifor_MPIAIJ
1482 };
1483 
1484 /* ----------------------------------------------------------------------------------------*/
1485 
1486 EXTERN_C_BEGIN
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1489 int MatStoreValues_MPIAIJ(Mat mat)
1490 {
1491   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1492   int        ierr;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1496   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 EXTERN_C_END
1500 
1501 EXTERN_C_BEGIN
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1504 int MatRetrieveValues_MPIAIJ(Mat mat)
1505 {
1506   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1507   int        ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1511   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 EXTERN_C_END
1515 
1516 #include "petscpc.h"
1517 EXTERN_C_BEGIN
1518 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1519 EXTERN_C_END
1520 
1521 EXTERN_C_BEGIN
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatCreate_MPIAIJ"
1524 int MatCreate_MPIAIJ(Mat B)
1525 {
1526   Mat_MPIAIJ *b;
1527   int        ierr,i,size;
1528 #if defined(PETSC_HAVE_SUPERLUDIST)
1529   PetscTruth flg;
1530 #endif
1531 
1532 
1533   PetscFunctionBegin;
1534   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1535 
1536   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1537   B->data         = (void*)b;
1538   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1539   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1540   B->factor       = 0;
1541   B->assembled    = PETSC_FALSE;
1542   B->mapping      = 0;
1543 
1544   B->insertmode      = NOT_SET_VALUES;
1545   b->size            = size;
1546   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1547 
1548   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1549   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1550 
1551   /* the information in the maps duplicates the information computed below, eventually
1552      we should remove the duplicate information that is not contained in the maps */
1553   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1554   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1555 
1556   /* build local table of row and column ownerships */
1557   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1558   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1559   b->cowners = b->rowners + b->size + 2;
1560   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1561   b->rowners[0] = 0;
1562   for (i=2; i<=b->size; i++) {
1563     b->rowners[i] += b->rowners[i-1];
1564   }
1565   b->rstart = b->rowners[b->rank];
1566   b->rend   = b->rowners[b->rank+1];
1567   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1568   b->cowners[0] = 0;
1569   for (i=2; i<=b->size; i++) {
1570     b->cowners[i] += b->cowners[i-1];
1571   }
1572   b->cstart = b->cowners[b->rank];
1573   b->cend   = b->cowners[b->rank+1];
1574 
1575   /* build cache for off array entries formed */
1576   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1577   b->donotstash  = PETSC_FALSE;
1578   b->colmap      = 0;
1579   b->garray      = 0;
1580   b->roworiented = PETSC_TRUE;
1581 
1582   /* stuff used for matrix vector multiply */
1583   b->lvec      = PETSC_NULL;
1584   b->Mvctx     = PETSC_NULL;
1585 
1586   /* stuff for MatGetRow() */
1587   b->rowindices   = 0;
1588   b->rowvalues    = 0;
1589   b->getrowactive = PETSC_FALSE;
1590 
1591 #if defined(PETSC_HAVE_SUPERLUDIST)
1592   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr);
1593   if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); }
1594 #endif
1595 
1596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1597                                      "MatStoreValues_MPIAIJ",
1598                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1600                                      "MatRetrieveValues_MPIAIJ",
1601                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1603                                      "MatGetDiagonalBlock_MPIAIJ",
1604                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1605 
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1613 {
1614   Mat        mat;
1615   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1616   int        ierr;
1617 
1618   PetscFunctionBegin;
1619   *newmat       = 0;
1620   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1621   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1622   a    = (Mat_MPIAIJ*)mat->data;
1623   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1624   mat->factor       = matin->factor;
1625   mat->assembled    = PETSC_TRUE;
1626   mat->insertmode   = NOT_SET_VALUES;
1627   mat->preallocated = PETSC_TRUE;
1628 
1629   a->rstart       = oldmat->rstart;
1630   a->rend         = oldmat->rend;
1631   a->cstart       = oldmat->cstart;
1632   a->cend         = oldmat->cend;
1633   a->size         = oldmat->size;
1634   a->rank         = oldmat->rank;
1635   a->donotstash   = oldmat->donotstash;
1636   a->roworiented  = oldmat->roworiented;
1637   a->rowindices   = 0;
1638   a->rowvalues    = 0;
1639   a->getrowactive = PETSC_FALSE;
1640 
1641   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1642   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1643   if (oldmat->colmap) {
1644 #if defined (PETSC_USE_CTABLE)
1645     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1646 #else
1647     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1648     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1649     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1650 #endif
1651   } else a->colmap = 0;
1652   if (oldmat->garray) {
1653     int len;
1654     len  = oldmat->B->n;
1655     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1656     PetscLogObjectMemory(mat,len*sizeof(int));
1657     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1658   } else a->garray = 0;
1659 
1660   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1661   PetscLogObjectParent(mat,a->lvec);
1662   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1663   PetscLogObjectParent(mat,a->Mvctx);
1664   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1665   PetscLogObjectParent(mat,a->A);
1666   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1667   PetscLogObjectParent(mat,a->B);
1668   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1669   *newmat = mat;
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #include "petscsys.h"
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatLoad_MPIAIJ"
1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1679 {
1680   Mat          A;
1681   PetscScalar  *vals,*svals;
1682   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1683   MPI_Status   status;
1684   int          i,nz,ierr,j,rstart,rend,fd;
1685   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1686   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1687   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1688 
1689   PetscFunctionBegin;
1690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1692   if (!rank) {
1693     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1694     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1695     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1696     if (header[3] < 0) {
1697       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1698     }
1699   }
1700 
1701   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1702   M = header[1]; N = header[2];
1703   /* determine ownership of all rows */
1704   m = M/size + ((M % size) > rank);
1705   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1706   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1707   rowners[0] = 0;
1708   for (i=2; i<=size; i++) {
1709     rowners[i] += rowners[i-1];
1710   }
1711   rstart = rowners[rank];
1712   rend   = rowners[rank+1];
1713 
1714   /* distribute row lengths to all processors */
1715   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1716   offlens = ourlens + (rend-rstart);
1717   if (!rank) {
1718     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1719     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1720     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1721     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1722     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1723     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1724   } else {
1725     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1726   }
1727 
1728   if (!rank) {
1729     /* calculate the number of nonzeros on each processor */
1730     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1731     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1732     for (i=0; i<size; i++) {
1733       for (j=rowners[i]; j< rowners[i+1]; j++) {
1734         procsnz[i] += rowlengths[j];
1735       }
1736     }
1737     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1738 
1739     /* determine max buffer needed and allocate it */
1740     maxnz = 0;
1741     for (i=0; i<size; i++) {
1742       maxnz = PetscMax(maxnz,procsnz[i]);
1743     }
1744     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1745 
1746     /* read in my part of the matrix column indices  */
1747     nz   = procsnz[0];
1748     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1749     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1750 
1751     /* read in every one elses and ship off */
1752     for (i=1; i<size; i++) {
1753       nz   = procsnz[i];
1754       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1755       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1756     }
1757     ierr = PetscFree(cols);CHKERRQ(ierr);
1758   } else {
1759     /* determine buffer space needed for message */
1760     nz = 0;
1761     for (i=0; i<m; i++) {
1762       nz += ourlens[i];
1763     }
1764     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1765 
1766     /* receive message of column indices*/
1767     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1768     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1769     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1770   }
1771 
1772   /* determine column ownership if matrix is not square */
1773   if (N != M) {
1774     n      = N/size + ((N % size) > rank);
1775     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1776     cstart = cend - n;
1777   } else {
1778     cstart = rstart;
1779     cend   = rend;
1780     n      = cend - cstart;
1781   }
1782 
1783   /* loop over local rows, determining number of off diagonal entries */
1784   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1785   jj = 0;
1786   for (i=0; i<m; i++) {
1787     for (j=0; j<ourlens[i]; j++) {
1788       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1789       jj++;
1790     }
1791   }
1792 
1793   /* create our matrix */
1794   for (i=0; i<m; i++) {
1795     ourlens[i] -= offlens[i];
1796   }
1797   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1798   A = *newmat;
1799   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1800   for (i=0; i<m; i++) {
1801     ourlens[i] += offlens[i];
1802   }
1803 
1804   if (!rank) {
1805     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1806 
1807     /* read in my part of the matrix numerical values  */
1808     nz   = procsnz[0];
1809     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1810 
1811     /* insert into matrix */
1812     jj      = rstart;
1813     smycols = mycols;
1814     svals   = vals;
1815     for (i=0; i<m; i++) {
1816       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1817       smycols += ourlens[i];
1818       svals   += ourlens[i];
1819       jj++;
1820     }
1821 
1822     /* read in other processors and ship out */
1823     for (i=1; i<size; i++) {
1824       nz   = procsnz[i];
1825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1826       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1827     }
1828     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1829   } else {
1830     /* receive numeric values */
1831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1832 
1833     /* receive message of values*/
1834     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1835     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1836     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1837 
1838     /* insert into matrix */
1839     jj      = rstart;
1840     smycols = mycols;
1841     svals   = vals;
1842     for (i=0; i<m; i++) {
1843       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1844       smycols += ourlens[i];
1845       svals   += ourlens[i];
1846       jj++;
1847     }
1848   }
1849   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1850   ierr = PetscFree(vals);CHKERRQ(ierr);
1851   ierr = PetscFree(mycols);CHKERRQ(ierr);
1852   ierr = PetscFree(rowners);CHKERRQ(ierr);
1853 
1854   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1855   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1862 /*
1863     Not great since it makes two copies of the submatrix, first an SeqAIJ
1864   in local and then by concatenating the local matrices the end result.
1865   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1866 */
1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1868 {
1869   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1870   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1871   Mat          *local,M,Mreuse;
1872   PetscScalar  *vwork,*aa;
1873   MPI_Comm     comm = mat->comm;
1874   Mat_SeqAIJ   *aij;
1875 
1876 
1877   PetscFunctionBegin;
1878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880 
1881   if (call ==  MAT_REUSE_MATRIX) {
1882     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1883     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1884     local = &Mreuse;
1885     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1886   } else {
1887     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1888     Mreuse = *local;
1889     ierr   = PetscFree(local);CHKERRQ(ierr);
1890   }
1891 
1892   /*
1893       m - number of local rows
1894       n - number of columns (same on all processors)
1895       rstart - first row in new global matrix generated
1896   */
1897   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1898   if (call == MAT_INITIAL_MATRIX) {
1899     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1900     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1901     ii  = aij->i;
1902     jj  = aij->j;
1903 
1904     /*
1905         Determine the number of non-zeros in the diagonal and off-diagonal
1906         portions of the matrix in order to do correct preallocation
1907     */
1908 
1909     /* first get start and end of "diagonal" columns */
1910     if (csize == PETSC_DECIDE) {
1911       nlocal = n/size + ((n % size) > rank);
1912     } else {
1913       nlocal = csize;
1914     }
1915     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1916     rstart = rend - nlocal;
1917     if (rank == size - 1 && rend != n) {
1918       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1919     }
1920 
1921     /* next, compute all the lengths */
1922     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1923     olens = dlens + m;
1924     for (i=0; i<m; i++) {
1925       jend = ii[i+1] - ii[i];
1926       olen = 0;
1927       dlen = 0;
1928       for (j=0; j<jend; j++) {
1929         if (*jj < rstart || *jj >= rend) olen++;
1930         else dlen++;
1931         jj++;
1932       }
1933       olens[i] = olen;
1934       dlens[i] = dlen;
1935     }
1936     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1937     ierr = PetscFree(dlens);CHKERRQ(ierr);
1938   } else {
1939     int ml,nl;
1940 
1941     M = *newmat;
1942     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1943     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1944     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1945     /*
1946          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1947        rather than the slower MatSetValues().
1948     */
1949     M->was_assembled = PETSC_TRUE;
1950     M->assembled     = PETSC_FALSE;
1951   }
1952   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1953   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1954   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1955   ii  = aij->i;
1956   jj  = aij->j;
1957   aa  = aij->a;
1958   for (i=0; i<m; i++) {
1959     row   = rstart + i;
1960     nz    = ii[i+1] - ii[i];
1961     cwork = jj;     jj += nz;
1962     vwork = aa;     aa += nz;
1963     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1964   }
1965 
1966   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   *newmat = M;
1969 
1970   /* save submatrix used in processor for next request */
1971   if (call ==  MAT_INITIAL_MATRIX) {
1972     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1973     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1974   }
1975 
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1981 /*@C
1982    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1983    (the default parallel PETSc format).  For good matrix assembly performance
1984    the user should preallocate the matrix storage by setting the parameters
1985    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1986    performance can be increased by more than a factor of 50.
1987 
1988    Collective on MPI_Comm
1989 
1990    Input Parameters:
1991 +  A - the matrix
1992 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1993            (same value is used for all local rows)
1994 .  d_nnz - array containing the number of nonzeros in the various rows of the
1995            DIAGONAL portion of the local submatrix (possibly different for each row)
1996            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1997            The size of this array is equal to the number of local rows, i.e 'm'.
1998            You must leave room for the diagonal entry even if it is zero.
1999 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2000            submatrix (same value is used for all local rows).
2001 -  o_nnz - array containing the number of nonzeros in the various rows of the
2002            OFF-DIAGONAL portion of the local submatrix (possibly different for
2003            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2004            structure. The size of this array is equal to the number
2005            of local rows, i.e 'm'.
2006 
2007    The AIJ format (also called the Yale sparse matrix format or
2008    compressed row storage), is fully compatible with standard Fortran 77
2009    storage.  That is, the stored row and column indices can begin at
2010    either one (as in Fortran) or zero.  See the users manual for details.
2011 
2012    The user MUST specify either the local or global matrix dimensions
2013    (possibly both).
2014 
2015    The parallel matrix is partitioned such that the first m0 rows belong to
2016    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2017    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2018 
2019    The DIAGONAL portion of the local submatrix of a processor can be defined
2020    as the submatrix which is obtained by extraction the part corresponding
2021    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2022    first row that belongs to the processor, and r2 is the last row belonging
2023    to the this processor. This is a square mxm matrix. The remaining portion
2024    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2025 
2026    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2027 
2028    By default, this format uses inodes (identical nodes) when possible.
2029    We search for consecutive rows with the same nonzero structure, thereby
2030    reusing matrix information to achieve increased efficiency.
2031 
2032    Options Database Keys:
2033 +  -mat_aij_no_inode  - Do not use inodes
2034 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2035 -  -mat_aij_oneindex - Internally use indexing starting at 1
2036         rather than 0.  Note that when calling MatSetValues(),
2037         the user still MUST index entries starting at 0!
2038 
2039    Example usage:
2040 
2041    Consider the following 8x8 matrix with 34 non-zero values, that is
2042    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2043    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2044    as follows:
2045 
2046 .vb
2047             1  2  0  |  0  3  0  |  0  4
2048     Proc0   0  5  6  |  7  0  0  |  8  0
2049             9  0 10  | 11  0  0  | 12  0
2050     -------------------------------------
2051            13  0 14  | 15 16 17  |  0  0
2052     Proc1   0 18  0  | 19 20 21  |  0  0
2053             0  0  0  | 22 23  0  | 24  0
2054     -------------------------------------
2055     Proc2  25 26 27  |  0  0 28  | 29  0
2056            30  0  0  | 31 32 33  |  0 34
2057 .ve
2058 
2059    This can be represented as a collection of submatrices as:
2060 
2061 .vb
2062       A B C
2063       D E F
2064       G H I
2065 .ve
2066 
2067    Where the submatrices A,B,C are owned by proc0, D,E,F are
2068    owned by proc1, G,H,I are owned by proc2.
2069 
2070    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2071    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2072    The 'M','N' parameters are 8,8, and have the same values on all procs.
2073 
2074    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2075    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2076    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2077    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2078    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2079    matrix, ans [DF] as another SeqAIJ matrix.
2080 
2081    When d_nz, o_nz parameters are specified, d_nz storage elements are
2082    allocated for every row of the local diagonal submatrix, and o_nz
2083    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2084    One way to choose d_nz and o_nz is to use the max nonzerors per local
2085    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2086    In this case, the values of d_nz,o_nz are:
2087 .vb
2088      proc0 : dnz = 2, o_nz = 2
2089      proc1 : dnz = 3, o_nz = 2
2090      proc2 : dnz = 1, o_nz = 4
2091 .ve
2092    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2093    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2094    for proc3. i.e we are using 12+15+10=37 storage locations to store
2095    34 values.
2096 
2097    When d_nnz, o_nnz parameters are specified, the storage is specified
2098    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2099    In the above case the values for d_nnz,o_nnz are:
2100 .vb
2101      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2102      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2103      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2104 .ve
2105    Here the space allocated is sum of all the above values i.e 34, and
2106    hence pre-allocation is perfect.
2107 
2108    Level: intermediate
2109 
2110 .keywords: matrix, aij, compressed row, sparse, parallel
2111 
2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2113 @*/
2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2115 {
2116   Mat_MPIAIJ   *b;
2117   int          ierr,i;
2118   PetscTruth   flg2;
2119 
2120   PetscFunctionBegin;
2121   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2122   if (!flg2) PetscFunctionReturn(0);
2123   B->preallocated = PETSC_TRUE;
2124   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2125   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2126   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2127   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2128   if (d_nnz) {
2129     for (i=0; i<B->m; i++) {
2130       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2131     }
2132   }
2133   if (o_nnz) {
2134     for (i=0; i<B->m; i++) {
2135       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2136     }
2137   }
2138   b = (Mat_MPIAIJ*)B->data;
2139 
2140   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2141   PetscLogObjectParent(B,b->A);
2142   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2143   PetscLogObjectParent(B,b->B);
2144 
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatCreateMPIAIJ"
2150 /*@C
2151    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2152    (the default parallel PETSc format).  For good matrix assembly performance
2153    the user should preallocate the matrix storage by setting the parameters
2154    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2155    performance can be increased by more than a factor of 50.
2156 
2157    Collective on MPI_Comm
2158 
2159    Input Parameters:
2160 +  comm - MPI communicator
2161 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2162            This value should be the same as the local size used in creating the
2163            y vector for the matrix-vector product y = Ax.
2164 .  n - This value should be the same as the local size used in creating the
2165        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2166        calculated if N is given) For square matrices n is almost always m.
2167 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2168 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2169 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2170            (same value is used for all local rows)
2171 .  d_nnz - array containing the number of nonzeros in the various rows of the
2172            DIAGONAL portion of the local submatrix (possibly different for each row)
2173            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2174            The size of this array is equal to the number of local rows, i.e 'm'.
2175            You must leave room for the diagonal entry even if it is zero.
2176 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2177            submatrix (same value is used for all local rows).
2178 -  o_nnz - array containing the number of nonzeros in the various rows of the
2179            OFF-DIAGONAL portion of the local submatrix (possibly different for
2180            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2181            structure. The size of this array is equal to the number
2182            of local rows, i.e 'm'.
2183 
2184    Output Parameter:
2185 .  A - the matrix
2186 
2187    Notes:
2188    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2189    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2190    storage requirements for this matrix.
2191 
2192    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2193    processor than it must be used on all processors that share the object for
2194    that argument.
2195 
2196    The AIJ format (also called the Yale sparse matrix format or
2197    compressed row storage), is fully compatible with standard Fortran 77
2198    storage.  That is, the stored row and column indices can begin at
2199    either one (as in Fortran) or zero.  See the users manual for details.
2200 
2201    The user MUST specify either the local or global matrix dimensions
2202    (possibly both).
2203 
2204    The parallel matrix is partitioned such that the first m0 rows belong to
2205    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2206    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2207 
2208    The DIAGONAL portion of the local submatrix of a processor can be defined
2209    as the submatrix which is obtained by extraction the part corresponding
2210    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2211    first row that belongs to the processor, and r2 is the last row belonging
2212    to the this processor. This is a square mxm matrix. The remaining portion
2213    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2214 
2215    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2216 
2217    By default, this format uses inodes (identical nodes) when possible.
2218    We search for consecutive rows with the same nonzero structure, thereby
2219    reusing matrix information to achieve increased efficiency.
2220 
2221    Options Database Keys:
2222 +  -mat_aij_no_inode  - Do not use inodes
2223 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2224 -  -mat_aij_oneindex - Internally use indexing starting at 1
2225         rather than 0.  Note that when calling MatSetValues(),
2226         the user still MUST index entries starting at 0!
2227 
2228 
2229    Example usage:
2230 
2231    Consider the following 8x8 matrix with 34 non-zero values, that is
2232    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2233    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2234    as follows:
2235 
2236 .vb
2237             1  2  0  |  0  3  0  |  0  4
2238     Proc0   0  5  6  |  7  0  0  |  8  0
2239             9  0 10  | 11  0  0  | 12  0
2240     -------------------------------------
2241            13  0 14  | 15 16 17  |  0  0
2242     Proc1   0 18  0  | 19 20 21  |  0  0
2243             0  0  0  | 22 23  0  | 24  0
2244     -------------------------------------
2245     Proc2  25 26 27  |  0  0 28  | 29  0
2246            30  0  0  | 31 32 33  |  0 34
2247 .ve
2248 
2249    This can be represented as a collection of submatrices as:
2250 
2251 .vb
2252       A B C
2253       D E F
2254       G H I
2255 .ve
2256 
2257    Where the submatrices A,B,C are owned by proc0, D,E,F are
2258    owned by proc1, G,H,I are owned by proc2.
2259 
2260    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2261    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2262    The 'M','N' parameters are 8,8, and have the same values on all procs.
2263 
2264    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2265    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2266    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2267    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2268    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2269    matrix, ans [DF] as another SeqAIJ matrix.
2270 
2271    When d_nz, o_nz parameters are specified, d_nz storage elements are
2272    allocated for every row of the local diagonal submatrix, and o_nz
2273    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2274    One way to choose d_nz and o_nz is to use the max nonzerors per local
2275    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2276    In this case, the values of d_nz,o_nz are:
2277 .vb
2278      proc0 : dnz = 2, o_nz = 2
2279      proc1 : dnz = 3, o_nz = 2
2280      proc2 : dnz = 1, o_nz = 4
2281 .ve
2282    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2283    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2284    for proc3. i.e we are using 12+15+10=37 storage locations to store
2285    34 values.
2286 
2287    When d_nnz, o_nnz parameters are specified, the storage is specified
2288    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2289    In the above case the values for d_nnz,o_nnz are:
2290 .vb
2291      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2292      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2293      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2294 .ve
2295    Here the space allocated is sum of all the above values i.e 34, and
2296    hence pre-allocation is perfect.
2297 
2298    Level: intermediate
2299 
2300 .keywords: matrix, aij, compressed row, sparse, parallel
2301 
2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2303 @*/
2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2305 {
2306   int ierr,size;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2311   if (size > 1) {
2312     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2313     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2314   } else {
2315     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2316     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2324 {
2325   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2326   PetscFunctionBegin;
2327   *Ad     = a->A;
2328   *Ao     = a->B;
2329   *colmap = a->garray;
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2336 {
2337   int        ierr;
2338   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2339 
2340   PetscFunctionBegin;
2341   if (coloring->ctype == IS_COLORING_LOCAL) {
2342     int        *allcolors,*colors,i;
2343     ISColoring ocoloring;
2344 
2345     /* set coloring for diagonal portion */
2346     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2347 
2348     /* set coloring for off-diagonal portion */
2349     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2350     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2351     for (i=0; i<a->B->n; i++) {
2352       colors[i] = allcolors[a->garray[i]];
2353     }
2354     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2355     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2356     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2357     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2358   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2359     int        *colors,i,*larray;
2360     ISColoring ocoloring;
2361 
2362     /* set coloring for diagonal portion */
2363     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2364     for (i=0; i<a->A->n; i++) {
2365       larray[i] = i + a->cstart;
2366     }
2367     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2368     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2369     for (i=0; i<a->A->n; i++) {
2370       colors[i] = coloring->colors[larray[i]];
2371     }
2372     ierr = PetscFree(larray);CHKERRQ(ierr);
2373     ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2374     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2375     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2376 
2377     /* set coloring for off-diagonal portion */
2378     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2379     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2380     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2381     for (i=0; i<a->B->n; i++) {
2382       colors[i] = coloring->colors[larray[i]];
2383     }
2384     ierr = PetscFree(larray);CHKERRQ(ierr);
2385     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2386     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2387     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2388   } else {
2389     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2390   }
2391 
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2398 {
2399   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2400   int        ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2404   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2411 {
2412   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2413   int        ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2417   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420