xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0e5e90baed20fcf66590e8d3abe9cacb3e2155d0)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380 #if defined(PETSC_DEBUG)
381   PetscTruth     found = PETSC_FALSE;
382 #endif
383 
384   PetscFunctionBegin;
385   /*  first count number of contributors to each processor */
386   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
387   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
388   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
389   j = 0;
390   for (i=0; i<N; i++) {
391     if (lastidx > (idx = rows[i])) j = 0;
392     lastidx = idx;
393     for (; j<size; j++) {
394       if (idx >= owners[j] && idx < owners[j+1]) {
395         nprocs[2*j]++;
396         nprocs[2*j+1] = 1;
397         owner[i] = j;
398 #if defined(PETSC_DEBUG)
399         found = PETSC_TRUE;
400 #endif
401         break;
402       }
403     }
404 #if defined(PETSC_DEBUG)
405     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
406     found = PETSC_FALSE;
407 #endif
408   }
409   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
410 
411   /* inform other processors of number of messages and max length*/
412   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
413 
414   /* post receives:   */
415   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
416   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
417   for (i=0; i<nrecvs; i++) {
418     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
419   }
420 
421   /* do sends:
422       1) starts[i] gives the starting index in svalues for stuff going to
423          the ith processor
424   */
425   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
426   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
427   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
428   starts[0] = 0;
429   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
430   for (i=0; i<N; i++) {
431     svalues[starts[owner[i]]++] = rows[i];
432   }
433 
434   starts[0] = 0;
435   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
436   count = 0;
437   for (i=0; i<size; i++) {
438     if (nprocs[2*i+1]) {
439       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
440     }
441   }
442   ierr = PetscFree(starts);CHKERRQ(ierr);
443 
444   base = owners[rank];
445 
446   /*  wait on receives */
447   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
448   source = lens + nrecvs;
449   count  = nrecvs; slen = 0;
450   while (count) {
451     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
452     /* unpack receives into our local space */
453     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
454     source[imdex]  = recv_status.MPI_SOURCE;
455     lens[imdex]    = n;
456     slen          += n;
457     count--;
458   }
459   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
460 
461   /* move the data into the send scatter */
462   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
463   count = 0;
464   for (i=0; i<nrecvs; i++) {
465     values = rvalues + i*nmax;
466     for (j=0; j<lens[i]; j++) {
467       lrows[count++] = values[j] - base;
468     }
469   }
470   ierr = PetscFree(rvalues);CHKERRQ(ierr);
471   ierr = PetscFree(lens);CHKERRQ(ierr);
472   ierr = PetscFree(owner);CHKERRQ(ierr);
473   ierr = PetscFree(nprocs);CHKERRQ(ierr);
474 
475   /* actually zap the local rows */
476   /*
477         Zero the required rows. If the "diagonal block" of the matrix
478      is square and the user wishes to set the diagonal we use seperate
479      code so that MatSetValues() is not called for each diagonal allocating
480      new memory, thus calling lots of mallocs and slowing things down.
481 
482        Contributed by: Matthew Knepley
483   */
484   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
485   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
486   if ((diag != 0.0) && (l->A->M == l->A->N)) {
487     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
488   } else if (diag != 0.0) {
489     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
490     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
491       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
493     }
494     for (i = 0; i < slen; i++) {
495       row  = lrows[i] + rstart;
496       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
497     }
498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
500   } else {
501     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
502   }
503   ierr = PetscFree(lrows);CHKERRQ(ierr);
504 
505   /* wait on sends */
506   if (nsends) {
507     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
508     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
509     ierr = PetscFree(send_status);CHKERRQ(ierr);
510   }
511   ierr = PetscFree(send_waits);CHKERRQ(ierr);
512   ierr = PetscFree(svalues);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatMult_MPIAIJ"
519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
520 {
521   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
522   PetscErrorCode ierr;
523   PetscInt       nt;
524 
525   PetscFunctionBegin;
526   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
527   if (nt != A->n) {
528     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
529   }
530   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
531   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
532   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
533   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatMultAdd_MPIAIJ"
539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
540 {
541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
546   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
547   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
548   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
555 {
556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
557   PetscErrorCode ierr;
558   PetscTruth     merged;
559 
560   PetscFunctionBegin;
561   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
562   /* do nondiagonal part */
563   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
564   if (!merged) {
565     /* send it on its way */
566     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
567     /* do local part */
568     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
569     /* receive remote parts: note this assumes the values are not actually */
570     /* added in yy until the next line, */
571     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
572   } else {
573     /* do local part */
574     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
575     /* send it on its way */
576     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
577     /* values actually were received in the Begin() but we need to call this nop */
578     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
587 {
588   MPI_Comm       comm;
589   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
590   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
591   IS             Me,Notme;
592   PetscErrorCode ierr;
593   PetscInt       M,N,first,last,*notme,i;
594   PetscMPIInt    size;
595 
596   PetscFunctionBegin;
597 
598   /* Easy test: symmetric diagonal block */
599   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
600   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
601   if (!*f) PetscFunctionReturn(0);
602   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
604   if (size == 1) PetscFunctionReturn(0);
605 
606   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
607   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
608   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
609   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
610   for (i=0; i<first; i++) notme[i] = i;
611   for (i=last; i<M; i++) notme[i-last+first] = i;
612   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
613   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
614   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
615   Aoff = Aoffs[0];
616   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
617   Boff = Boffs[0];
618   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
619   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
620   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
621   ierr = ISDestroy(Me);CHKERRQ(ierr);
622   ierr = ISDestroy(Notme);CHKERRQ(ierr);
623 
624   PetscFunctionReturn(0);
625 }
626 EXTERN_C_END
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
633   PetscErrorCode ierr;
634 
635   PetscFunctionBegin;
636   /* do nondiagonal part */
637   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
638   /* send it on its way */
639   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
640   /* do local part */
641   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
642   /* receive remote parts */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   PetscErrorCode ierr;
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
670 {
671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
676   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705 
706   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
707   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
708   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
709   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
710   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
711   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
712   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_Binary"
718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
722   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
723   PetscErrorCode    ierr;
724   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
725   int               fd;
726   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
727   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
728   PetscScalar       *column_values;
729 
730   PetscFunctionBegin;
731   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
732   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
733   nz   = A->nz + B->nz;
734   if (!rank) {
735     header[0] = MAT_FILE_COOKIE;
736     header[1] = mat->M;
737     header[2] = mat->N;
738     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
739     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
740     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
741     /* get largest number of rows any processor has */
742     rlen = mat->m;
743     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
744     for (i=1; i<size; i++) {
745       rlen = PetscMax(rlen,range[i+1] - range[i]);
746     }
747   } else {
748     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
749     rlen = mat->m;
750   }
751 
752   /* load up the local row counts */
753   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
754   for (i=0; i<mat->m; i++) {
755     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
756   }
757 
758   /* store the row lengths to the file */
759   if (!rank) {
760     MPI_Status status;
761     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
762     for (i=1; i<size; i++) {
763       rlen = range[i+1] - range[i];
764       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
765       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     }
767   } else {
768     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
769   }
770   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
771 
772   /* load up the local column indices */
773   nzmax = nz; /* )th processor needs space a largest processor needs */
774   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
775   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
776   cnt  = 0;
777   for (i=0; i<mat->m; i++) {
778     for (j=B->i[i]; j<B->i[i+1]; j++) {
779       if ( (col = garray[B->j[j]]) > cstart) break;
780       column_indices[cnt++] = col;
781     }
782     for (k=A->i[i]; k<A->i[i+1]; k++) {
783       column_indices[cnt++] = A->j[k] + cstart;
784     }
785     for (; j<B->i[i+1]; j++) {
786       column_indices[cnt++] = garray[B->j[j]];
787     }
788   }
789   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
790 
791   /* store the column indices to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
797       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
798       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
799       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
800     }
801   } else {
802     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
803     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(column_indices);CHKERRQ(ierr);
806 
807   /* load up the local column values */
808   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( garray[B->j[j]] > cstart) break;
813       column_values[cnt++] = B->a[j];
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_values[cnt++] = A->a[k];
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_values[cnt++] = B->a[j];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
823 
824   /* store the column values to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
831       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_values);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
845 {
846   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
847   PetscErrorCode    ierr;
848   PetscMPIInt       rank = aij->rank,size = aij->size;
849   PetscTruth        isdraw,iascii,isbinary;
850   PetscViewer       sviewer;
851   PetscViewerFormat format;
852 
853   PetscFunctionBegin;
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
856   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
857   if (iascii) {
858     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
859     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
860       MatInfo    info;
861       PetscTruth inodes;
862 
863       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
864       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
865       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
866       if (!inodes) {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
868 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
869       } else {
870         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
871 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
872       }
873       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
876       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
877       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
878       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       PetscInt   inodecount,inodelimit,*inodes;
882       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
883       if (inodes) {
884         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
885       } else {
886         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
887       }
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
890       PetscFunctionReturn(0);
891     }
892   } else if (isbinary) {
893     if (size == 1) {
894       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
895       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
896     } else {
897       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
898     }
899     PetscFunctionReturn(0);
900   } else if (isdraw) {
901     PetscDraw  draw;
902     PetscTruth isnull;
903     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
904     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
905   }
906 
907   if (size == 1) {
908     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
909     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
910   } else {
911     /* assemble the entire matrix onto first processor. */
912     Mat         A;
913     Mat_SeqAIJ  *Aloc;
914     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
915     PetscScalar *a;
916 
917     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
918     if (!rank) {
919       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
920     } else {
921       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
922     }
923     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
924     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
925     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
926     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqAIJ*)aij->A->data;
930     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     row = aij->rstart;
932     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
933     for (i=0; i<m; i++) {
934       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
935       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936     }
937     aj = Aloc->j;
938     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
939 
940     /* copy over the B part */
941     Aloc = (Mat_SeqAIJ*)aij->B->data;
942     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row  = aij->rstart;
944     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
945     ct   = cols;
946     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950     }
951     ierr = PetscFree(ct);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
961       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
964     ierr = MatDestroy(A);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatView_MPIAIJ"
971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972 {
973   PetscErrorCode ierr;
974   PetscTruth     iascii,isdraw,issocket,isbinary;
975 
976   PetscFunctionBegin;
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
978   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
980   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
981   if (iascii || isdraw || isbinary || issocket) {
982     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
983   } else {
984     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatRelax_MPIAIJ"
993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
994 {
995   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
996   PetscErrorCode ierr;
997   Vec            bb1;
998   PetscScalar    mone=-1.0;
999 
1000   PetscFunctionBegin;
1001   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1002 
1003   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1004 
1005   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1006     if (flag & SOR_ZERO_INITIAL_GUESS) {
1007       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1008       its--;
1009     }
1010 
1011     while (its--) {
1012       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1014 
1015       /* update rhs: bb1 = bb - B*x */
1016       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1017       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1018 
1019       /* local sweep */
1020       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1021       CHKERRQ(ierr);
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1026       its--;
1027     }
1028     while (its--) {
1029       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1031 
1032       /* update rhs: bb1 = bb - B*x */
1033       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1034       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1035 
1036       /* local sweep */
1037       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1038       CHKERRQ(ierr);
1039     }
1040   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1041     if (flag & SOR_ZERO_INITIAL_GUESS) {
1042       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1043       its--;
1044     }
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else {
1058     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1059   }
1060 
1061   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatPermute_MPIAIJ"
1067 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1068 {
1069   MPI_Comm       comm,pcomm;
1070   PetscInt       first,local_size,nrows,*rows;
1071   int            ntids;
1072   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1077   /* make a collective version of 'rowp' */
1078   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1079   if (pcomm==comm) {
1080     crowp = rowp;
1081   } else {
1082     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1083     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1084     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1085     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1086   }
1087   /* collect the global row permutation and invert it */
1088   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1089   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1090   if (pcomm!=comm) {
1091     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1092   }
1093   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1094   /* get the local target indices */
1095   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1096   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1097   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1098   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1099   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1100   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1101   /* the column permutation is so much easier;
1102      make a local version of 'colp' and invert it */
1103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1104   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1105   if (ntids==1) {
1106     lcolp = colp;
1107   } else {
1108     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1109     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1110     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1111   }
1112   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1113   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1114   if (ntids>1) {
1115     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1116     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1117   }
1118   /* now we just get the submatrix */
1119   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1120   /* clean up */
1121   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1122   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1128 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1129 {
1130   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1131   Mat            A = mat->A,B = mat->B;
1132   PetscErrorCode ierr;
1133   PetscReal      isend[5],irecv[5];
1134 
1135   PetscFunctionBegin;
1136   info->block_size     = 1.0;
1137   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1138   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1139   isend[3] = info->memory;  isend[4] = info->mallocs;
1140   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1141   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1142   isend[3] += info->memory;  isend[4] += info->mallocs;
1143   if (flag == MAT_LOCAL) {
1144     info->nz_used      = isend[0];
1145     info->nz_allocated = isend[1];
1146     info->nz_unneeded  = isend[2];
1147     info->memory       = isend[3];
1148     info->mallocs      = isend[4];
1149   } else if (flag == MAT_GLOBAL_MAX) {
1150     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1151     info->nz_used      = irecv[0];
1152     info->nz_allocated = irecv[1];
1153     info->nz_unneeded  = irecv[2];
1154     info->memory       = irecv[3];
1155     info->mallocs      = irecv[4];
1156   } else if (flag == MAT_GLOBAL_SUM) {
1157     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1158     info->nz_used      = irecv[0];
1159     info->nz_allocated = irecv[1];
1160     info->nz_unneeded  = irecv[2];
1161     info->memory       = irecv[3];
1162     info->mallocs      = irecv[4];
1163   }
1164   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1165   info->fill_ratio_needed = 0;
1166   info->factor_mallocs    = 0;
1167   info->rows_global       = (double)matin->M;
1168   info->columns_global    = (double)matin->N;
1169   info->rows_local        = (double)matin->m;
1170   info->columns_local     = (double)matin->N;
1171 
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatSetOption_MPIAIJ"
1177 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1178 {
1179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   switch (op) {
1184   case MAT_NO_NEW_NONZERO_LOCATIONS:
1185   case MAT_YES_NEW_NONZERO_LOCATIONS:
1186   case MAT_COLUMNS_UNSORTED:
1187   case MAT_COLUMNS_SORTED:
1188   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1189   case MAT_KEEP_ZEROED_ROWS:
1190   case MAT_NEW_NONZERO_LOCATION_ERR:
1191   case MAT_USE_INODES:
1192   case MAT_DO_NOT_USE_INODES:
1193   case MAT_IGNORE_ZERO_ENTRIES:
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1196     break;
1197   case MAT_ROW_ORIENTED:
1198     a->roworiented = PETSC_TRUE;
1199     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1200     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1201     break;
1202   case MAT_ROWS_SORTED:
1203   case MAT_ROWS_UNSORTED:
1204   case MAT_YES_NEW_DIAGONALS:
1205     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1206     break;
1207   case MAT_COLUMN_ORIENTED:
1208     a->roworiented = PETSC_FALSE;
1209     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = PETSC_TRUE;
1214     break;
1215   case MAT_NO_NEW_DIAGONALS:
1216     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1217   case MAT_SYMMETRIC:
1218   case MAT_STRUCTURALLY_SYMMETRIC:
1219   case MAT_HERMITIAN:
1220   case MAT_SYMMETRY_ETERNAL:
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     break;
1223   case MAT_NOT_SYMMETRIC:
1224   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1225   case MAT_NOT_HERMITIAN:
1226   case MAT_NOT_SYMMETRY_ETERNAL:
1227     break;
1228   default:
1229     SETERRQ(PETSC_ERR_SUP,"unknown option");
1230   }
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatGetRow_MPIAIJ"
1236 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1237 {
1238   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1239   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1240   PetscErrorCode ierr;
1241   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1242   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1243   PetscInt       *cmap,*idx_p;
1244 
1245   PetscFunctionBegin;
1246   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1247   mat->getrowactive = PETSC_TRUE;
1248 
1249   if (!mat->rowvalues && (idx || v)) {
1250     /*
1251         allocate enough space to hold information from the longest row.
1252     */
1253     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1254     PetscInt     max = 1,tmp;
1255     for (i=0; i<matin->m; i++) {
1256       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1257       if (max < tmp) { max = tmp; }
1258     }
1259     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1260     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1261   }
1262 
1263   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1264   lrow = row - rstart;
1265 
1266   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1267   if (!v)   {pvA = 0; pvB = 0;}
1268   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1269   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1270   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1271   nztot = nzA + nzB;
1272 
1273   cmap  = mat->garray;
1274   if (v  || idx) {
1275     if (nztot) {
1276       /* Sort by increasing column numbers, assuming A and B already sorted */
1277       PetscInt imark = -1;
1278       if (v) {
1279         *v = v_p = mat->rowvalues;
1280         for (i=0; i<nzB; i++) {
1281           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1282           else break;
1283         }
1284         imark = i;
1285         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1286         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1287       }
1288       if (idx) {
1289         *idx = idx_p = mat->rowindices;
1290         if (imark > -1) {
1291           for (i=0; i<imark; i++) {
1292             idx_p[i] = cmap[cworkB[i]];
1293           }
1294         } else {
1295           for (i=0; i<nzB; i++) {
1296             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1297             else break;
1298           }
1299           imark = i;
1300         }
1301         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1302         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1303       }
1304     } else {
1305       if (idx) *idx = 0;
1306       if (v)   *v   = 0;
1307     }
1308   }
1309   *nz = nztot;
1310   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1311   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1317 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1318 {
1319   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1320 
1321   PetscFunctionBegin;
1322   if (!aij->getrowactive) {
1323     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1324   }
1325   aij->getrowactive = PETSC_FALSE;
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatNorm_MPIAIJ"
1331 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1332 {
1333   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1334   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1335   PetscErrorCode ierr;
1336   PetscInt       i,j,cstart = aij->cstart;
1337   PetscReal      sum = 0.0;
1338   PetscScalar    *v;
1339 
1340   PetscFunctionBegin;
1341   if (aij->size == 1) {
1342     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1343   } else {
1344     if (type == NORM_FROBENIUS) {
1345       v = amat->a;
1346       for (i=0; i<amat->nz; i++) {
1347 #if defined(PETSC_USE_COMPLEX)
1348         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1349 #else
1350         sum += (*v)*(*v); v++;
1351 #endif
1352       }
1353       v = bmat->a;
1354       for (i=0; i<bmat->nz; i++) {
1355 #if defined(PETSC_USE_COMPLEX)
1356         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1357 #else
1358         sum += (*v)*(*v); v++;
1359 #endif
1360       }
1361       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1362       *norm = sqrt(*norm);
1363     } else if (type == NORM_1) { /* max column norm */
1364       PetscReal *tmp,*tmp2;
1365       PetscInt    *jj,*garray = aij->garray;
1366       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1367       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1368       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1369       *norm = 0.0;
1370       v = amat->a; jj = amat->j;
1371       for (j=0; j<amat->nz; j++) {
1372         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1373       }
1374       v = bmat->a; jj = bmat->j;
1375       for (j=0; j<bmat->nz; j++) {
1376         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1377       }
1378       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1379       for (j=0; j<mat->N; j++) {
1380         if (tmp2[j] > *norm) *norm = tmp2[j];
1381       }
1382       ierr = PetscFree(tmp);CHKERRQ(ierr);
1383       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1384     } else if (type == NORM_INFINITY) { /* max row norm */
1385       PetscReal ntemp = 0.0;
1386       for (j=0; j<aij->A->m; j++) {
1387         v = amat->a + amat->i[j];
1388         sum = 0.0;
1389         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1390           sum += PetscAbsScalar(*v); v++;
1391         }
1392         v = bmat->a + bmat->i[j];
1393         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1394           sum += PetscAbsScalar(*v); v++;
1395         }
1396         if (sum > ntemp) ntemp = sum;
1397       }
1398       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1399     } else {
1400       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1401     }
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatTranspose_MPIAIJ"
1408 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1409 {
1410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1411   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1412   PetscErrorCode ierr;
1413   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1414   Mat            B;
1415   PetscScalar    *array;
1416 
1417   PetscFunctionBegin;
1418   if (!matout && M != N) {
1419     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1420   }
1421 
1422   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1423   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1424   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1425   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1426 
1427   /* copy over the A part */
1428   Aloc = (Mat_SeqAIJ*)a->A->data;
1429   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1430   row = a->rstart;
1431   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1432   for (i=0; i<m; i++) {
1433     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1434     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1435   }
1436   aj = Aloc->j;
1437   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1438 
1439   /* copy over the B part */
1440   Aloc = (Mat_SeqAIJ*)a->B->data;
1441   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1442   row  = a->rstart;
1443   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1444   ct   = cols;
1445   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1446   for (i=0; i<m; i++) {
1447     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1448     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1449   }
1450   ierr = PetscFree(ct);CHKERRQ(ierr);
1451   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1452   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1453   if (matout) {
1454     *matout = B;
1455   } else {
1456     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1463 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1464 {
1465   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1466   Mat            a = aij->A,b = aij->B;
1467   PetscErrorCode ierr;
1468   PetscInt       s1,s2,s3;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1472   if (rr) {
1473     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1474     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1475     /* Overlap communication with computation. */
1476     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1477   }
1478   if (ll) {
1479     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1480     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1481     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1482   }
1483   /* scale  the diagonal block */
1484   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1485 
1486   if (rr) {
1487     /* Do a scatter end and then right scale the off-diagonal block */
1488     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1489     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1490   }
1491 
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1498 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1499 {
1500   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   if (!a->rank) {
1505     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1506   }
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1512 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1513 {
1514   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1515   PetscErrorCode ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1519   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1524 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1525 {
1526   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatEqual_MPIAIJ"
1536 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1537 {
1538   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1539   Mat            a,b,c,d;
1540   PetscTruth     flg;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   a = matA->A; b = matA->B;
1545   c = matB->A; d = matB->B;
1546 
1547   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1548   if (flg) {
1549     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1550   }
1551   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatCopy_MPIAIJ"
1557 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1558 {
1559   PetscErrorCode ierr;
1560   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1561   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1562 
1563   PetscFunctionBegin;
1564   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1565   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1566     /* because of the column compression in the off-processor part of the matrix a->B,
1567        the number of columns in a->B and b->B may be different, hence we cannot call
1568        the MatCopy() directly on the two parts. If need be, we can provide a more
1569        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1570        then copying the submatrices */
1571     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1572   } else {
1573     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1574     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1575   }
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1581 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1582 {
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #include "petscblaslapack.h"
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatAXPY_MPIAIJ"
1593 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1594 {
1595   PetscErrorCode ierr;
1596   PetscInt       i;
1597   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1598   PetscBLASInt   bnz,one=1;
1599   Mat_SeqAIJ     *x,*y;
1600 
1601   PetscFunctionBegin;
1602   if (str == SAME_NONZERO_PATTERN) {
1603     PetscScalar alpha = a;
1604     x = (Mat_SeqAIJ *)xx->A->data;
1605     y = (Mat_SeqAIJ *)yy->A->data;
1606     bnz = (PetscBLASInt)x->nz;
1607     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1608     x = (Mat_SeqAIJ *)xx->B->data;
1609     y = (Mat_SeqAIJ *)yy->B->data;
1610     bnz = (PetscBLASInt)x->nz;
1611     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1612   } else if (str == SUBSET_NONZERO_PATTERN) {
1613     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1614 
1615     x = (Mat_SeqAIJ *)xx->B->data;
1616     y = (Mat_SeqAIJ *)yy->B->data;
1617     if (y->xtoy && y->XtoY != xx->B) {
1618       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1619       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1620     }
1621     if (!y->xtoy) { /* get xtoy */
1622       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1623       y->XtoY = xx->B;
1624     }
1625     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1626   } else {
1627     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatConjugate_MPIAIJ"
1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1637 {
1638 #if defined(PETSC_USE_COMPLEX)
1639   PetscErrorCode ierr;
1640   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1641 
1642   PetscFunctionBegin;
1643   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1644   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1645 #else
1646   PetscFunctionBegin;
1647 #endif
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 /* -------------------------------------------------------------------*/
1652 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1653        MatGetRow_MPIAIJ,
1654        MatRestoreRow_MPIAIJ,
1655        MatMult_MPIAIJ,
1656 /* 4*/ MatMultAdd_MPIAIJ,
1657        MatMultTranspose_MPIAIJ,
1658        MatMultTransposeAdd_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662 /*10*/ 0,
1663        0,
1664        0,
1665        MatRelax_MPIAIJ,
1666        MatTranspose_MPIAIJ,
1667 /*15*/ MatGetInfo_MPIAIJ,
1668        MatEqual_MPIAIJ,
1669        MatGetDiagonal_MPIAIJ,
1670        MatDiagonalScale_MPIAIJ,
1671        MatNorm_MPIAIJ,
1672 /*20*/ MatAssemblyBegin_MPIAIJ,
1673        MatAssemblyEnd_MPIAIJ,
1674        0,
1675        MatSetOption_MPIAIJ,
1676        MatZeroEntries_MPIAIJ,
1677 /*25*/ MatZeroRows_MPIAIJ,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*30*/ MatSetUpPreallocation_MPIAIJ,
1683        0,
1684        0,
1685        0,
1686        0,
1687 /*35*/ MatDuplicate_MPIAIJ,
1688        0,
1689        0,
1690        0,
1691        0,
1692 /*40*/ MatAXPY_MPIAIJ,
1693        MatGetSubMatrices_MPIAIJ,
1694        MatIncreaseOverlap_MPIAIJ,
1695        MatGetValues_MPIAIJ,
1696        MatCopy_MPIAIJ,
1697 /*45*/ MatPrintHelp_MPIAIJ,
1698        MatScale_MPIAIJ,
1699        0,
1700        0,
1701        0,
1702 /*50*/ MatSetBlockSize_MPIAIJ,
1703        0,
1704        0,
1705        0,
1706        0,
1707 /*55*/ MatFDColoringCreate_MPIAIJ,
1708        0,
1709        MatSetUnfactored_MPIAIJ,
1710        MatPermute_MPIAIJ,
1711        0,
1712 /*60*/ MatGetSubMatrix_MPIAIJ,
1713        MatDestroy_MPIAIJ,
1714        MatView_MPIAIJ,
1715        MatGetPetscMaps_Petsc,
1716        0,
1717 /*65*/ 0,
1718        0,
1719        0,
1720        0,
1721        0,
1722 /*70*/ 0,
1723        0,
1724        MatSetColoring_MPIAIJ,
1725 #if defined(PETSC_HAVE_ADIC)
1726        MatSetValuesAdic_MPIAIJ,
1727 #else
1728        0,
1729 #endif
1730        MatSetValuesAdifor_MPIAIJ,
1731 /*75*/ 0,
1732        0,
1733        0,
1734        0,
1735        0,
1736 /*80*/ 0,
1737        0,
1738        0,
1739        0,
1740 /*84*/ MatLoad_MPIAIJ,
1741        0,
1742        0,
1743        0,
1744        0,
1745        0,
1746 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1747        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1748        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1749        MatPtAP_Basic,
1750        MatPtAPSymbolic_MPIAIJ,
1751 /*95*/ MatPtAPNumeric_MPIAIJ,
1752        0,
1753        0,
1754        0,
1755        0,
1756 /*100*/0,
1757        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1758        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1759        MatConjugate_MPIAIJ
1760 };
1761 
1762 /* ----------------------------------------------------------------------------------------*/
1763 
1764 EXTERN_C_BEGIN
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1767 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1768 {
1769   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1774   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 EXTERN_C_END
1778 
1779 EXTERN_C_BEGIN
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1782 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1783 {
1784   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1789   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 EXTERN_C_END
1793 
1794 #include "petscpc.h"
1795 EXTERN_C_BEGIN
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1798 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1799 {
1800   Mat_MPIAIJ     *b;
1801   PetscErrorCode ierr;
1802   PetscInt       i;
1803 
1804   PetscFunctionBegin;
1805   B->preallocated = PETSC_TRUE;
1806   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1807   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1808   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1809   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1810   if (d_nnz) {
1811     for (i=0; i<B->m; i++) {
1812       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1813     }
1814   }
1815   if (o_nnz) {
1816     for (i=0; i<B->m; i++) {
1817       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1818     }
1819   }
1820   b = (Mat_MPIAIJ*)B->data;
1821   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1822   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1823 
1824   PetscFunctionReturn(0);
1825 }
1826 EXTERN_C_END
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1831 {
1832   Mat            mat;
1833   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1834   PetscErrorCode ierr;
1835 
1836   PetscFunctionBegin;
1837   *newmat       = 0;
1838   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1839   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1840   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1841   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1842   a    = (Mat_MPIAIJ*)mat->data;
1843 
1844   mat->factor       = matin->factor;
1845   mat->bs           = matin->bs;
1846   mat->assembled    = PETSC_TRUE;
1847   mat->insertmode   = NOT_SET_VALUES;
1848   mat->preallocated = PETSC_TRUE;
1849 
1850   a->rstart       = oldmat->rstart;
1851   a->rend         = oldmat->rend;
1852   a->cstart       = oldmat->cstart;
1853   a->cend         = oldmat->cend;
1854   a->size         = oldmat->size;
1855   a->rank         = oldmat->rank;
1856   a->donotstash   = oldmat->donotstash;
1857   a->roworiented  = oldmat->roworiented;
1858   a->rowindices   = 0;
1859   a->rowvalues    = 0;
1860   a->getrowactive = PETSC_FALSE;
1861 
1862   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1863   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1864   if (oldmat->colmap) {
1865 #if defined (PETSC_USE_CTABLE)
1866     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1867 #else
1868     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1869     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1870     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1871 #endif
1872   } else a->colmap = 0;
1873   if (oldmat->garray) {
1874     PetscInt len;
1875     len  = oldmat->B->n;
1876     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1877     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1878     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1879   } else a->garray = 0;
1880 
1881   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1882   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1883   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1884   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1885   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1886   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1887   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1888   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1889   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1890   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1891   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1892   *newmat = mat;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #include "petscsys.h"
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatLoad_MPIAIJ"
1900 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1901 {
1902   Mat            A;
1903   PetscScalar    *vals,*svals;
1904   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1905   MPI_Status     status;
1906   PetscErrorCode ierr;
1907   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1908   PetscInt       i,nz,j,rstart,rend,mmax;
1909   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1910   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1911   PetscInt       cend,cstart,n,*rowners;
1912   int            fd;
1913 
1914   PetscFunctionBegin;
1915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1916   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1917   if (!rank) {
1918     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1919     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1920     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1921   }
1922 
1923   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1924   M = header[1]; N = header[2];
1925   /* determine ownership of all rows */
1926   m    = M/size + ((M % size) > rank);
1927   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1928   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1929 
1930   /* First process needs enough room for process with most rows */
1931   if (!rank) {
1932     mmax       = rowners[1];
1933     for (i=2; i<size; i++) {
1934       mmax = PetscMax(mmax,rowners[i]);
1935     }
1936   } else mmax = m;
1937 
1938   rowners[0] = 0;
1939   for (i=2; i<=size; i++) {
1940     mmax       = PetscMax(mmax,rowners[i]);
1941     rowners[i] += rowners[i-1];
1942   }
1943   rstart = rowners[rank];
1944   rend   = rowners[rank+1];
1945 
1946   /* distribute row lengths to all processors */
1947   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1948   if (!rank) {
1949     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1950     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1951     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1952     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1953     for (j=0; j<m; j++) {
1954       procsnz[0] += ourlens[j];
1955     }
1956     for (i=1; i<size; i++) {
1957       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1958       /* calculate the number of nonzeros on each processor */
1959       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1960         procsnz[i] += rowlengths[j];
1961       }
1962       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1963     }
1964     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1965   } else {
1966     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1967   }
1968 
1969   if (!rank) {
1970     /* determine max buffer needed and allocate it */
1971     maxnz = 0;
1972     for (i=0; i<size; i++) {
1973       maxnz = PetscMax(maxnz,procsnz[i]);
1974     }
1975     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1976 
1977     /* read in my part of the matrix column indices  */
1978     nz   = procsnz[0];
1979     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1980     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1981 
1982     /* read in every one elses and ship off */
1983     for (i=1; i<size; i++) {
1984       nz   = procsnz[i];
1985       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1986       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1987     }
1988     ierr = PetscFree(cols);CHKERRQ(ierr);
1989   } else {
1990     /* determine buffer space needed for message */
1991     nz = 0;
1992     for (i=0; i<m; i++) {
1993       nz += ourlens[i];
1994     }
1995     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1996 
1997     /* receive message of column indices*/
1998     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1999     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2000     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2001   }
2002 
2003   /* determine column ownership if matrix is not square */
2004   if (N != M) {
2005     n      = N/size + ((N % size) > rank);
2006     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2007     cstart = cend - n;
2008   } else {
2009     cstart = rstart;
2010     cend   = rend;
2011     n      = cend - cstart;
2012   }
2013 
2014   /* loop over local rows, determining number of off diagonal entries */
2015   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2016   jj = 0;
2017   for (i=0; i<m; i++) {
2018     for (j=0; j<ourlens[i]; j++) {
2019       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2020       jj++;
2021     }
2022   }
2023 
2024   /* create our matrix */
2025   for (i=0; i<m; i++) {
2026     ourlens[i] -= offlens[i];
2027   }
2028   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2029   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2030   ierr = MatSetType(A,type);CHKERRQ(ierr);
2031   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2032 
2033   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2034   for (i=0; i<m; i++) {
2035     ourlens[i] += offlens[i];
2036   }
2037 
2038   if (!rank) {
2039     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix numerical values  */
2042     nz   = procsnz[0];
2043     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2044 
2045     /* insert into matrix */
2046     jj      = rstart;
2047     smycols = mycols;
2048     svals   = vals;
2049     for (i=0; i<m; i++) {
2050       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2051       smycols += ourlens[i];
2052       svals   += ourlens[i];
2053       jj++;
2054     }
2055 
2056     /* read in other processors and ship out */
2057     for (i=1; i<size; i++) {
2058       nz   = procsnz[i];
2059       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2060       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2061     }
2062     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2063   } else {
2064     /* receive numeric values */
2065     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2066 
2067     /* receive message of values*/
2068     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2069     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2070     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2071 
2072     /* insert into matrix */
2073     jj      = rstart;
2074     smycols = mycols;
2075     svals   = vals;
2076     for (i=0; i<m; i++) {
2077       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2078       smycols += ourlens[i];
2079       svals   += ourlens[i];
2080       jj++;
2081     }
2082   }
2083   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2084   ierr = PetscFree(vals);CHKERRQ(ierr);
2085   ierr = PetscFree(mycols);CHKERRQ(ierr);
2086   ierr = PetscFree(rowners);CHKERRQ(ierr);
2087 
2088   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090   *newmat = A;
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2096 /*
2097     Not great since it makes two copies of the submatrix, first an SeqAIJ
2098   in local and then by concatenating the local matrices the end result.
2099   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2100 */
2101 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2102 {
2103   PetscErrorCode ierr;
2104   PetscMPIInt    rank,size;
2105   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2106   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2107   Mat            *local,M,Mreuse;
2108   PetscScalar    *vwork,*aa;
2109   MPI_Comm       comm = mat->comm;
2110   Mat_SeqAIJ     *aij;
2111 
2112 
2113   PetscFunctionBegin;
2114   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2115   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2116 
2117   if (call ==  MAT_REUSE_MATRIX) {
2118     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2119     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2120     local = &Mreuse;
2121     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2122   } else {
2123     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2124     Mreuse = *local;
2125     ierr   = PetscFree(local);CHKERRQ(ierr);
2126   }
2127 
2128   /*
2129       m - number of local rows
2130       n - number of columns (same on all processors)
2131       rstart - first row in new global matrix generated
2132   */
2133   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2134   if (call == MAT_INITIAL_MATRIX) {
2135     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2136     ii  = aij->i;
2137     jj  = aij->j;
2138 
2139     /*
2140         Determine the number of non-zeros in the diagonal and off-diagonal
2141         portions of the matrix in order to do correct preallocation
2142     */
2143 
2144     /* first get start and end of "diagonal" columns */
2145     if (csize == PETSC_DECIDE) {
2146       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2147       if (mglobal == n) { /* square matrix */
2148 	nlocal = m;
2149       } else {
2150         nlocal = n/size + ((n % size) > rank);
2151       }
2152     } else {
2153       nlocal = csize;
2154     }
2155     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2156     rstart = rend - nlocal;
2157     if (rank == size - 1 && rend != n) {
2158       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2159     }
2160 
2161     /* next, compute all the lengths */
2162     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2163     olens = dlens + m;
2164     for (i=0; i<m; i++) {
2165       jend = ii[i+1] - ii[i];
2166       olen = 0;
2167       dlen = 0;
2168       for (j=0; j<jend; j++) {
2169         if (*jj < rstart || *jj >= rend) olen++;
2170         else dlen++;
2171         jj++;
2172       }
2173       olens[i] = olen;
2174       dlens[i] = dlen;
2175     }
2176     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2177     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2178     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2179     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2180     ierr = PetscFree(dlens);CHKERRQ(ierr);
2181   } else {
2182     PetscInt ml,nl;
2183 
2184     M = *newmat;
2185     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2186     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2187     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2188     /*
2189          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2190        rather than the slower MatSetValues().
2191     */
2192     M->was_assembled = PETSC_TRUE;
2193     M->assembled     = PETSC_FALSE;
2194   }
2195   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2196   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2197   ii  = aij->i;
2198   jj  = aij->j;
2199   aa  = aij->a;
2200   for (i=0; i<m; i++) {
2201     row   = rstart + i;
2202     nz    = ii[i+1] - ii[i];
2203     cwork = jj;     jj += nz;
2204     vwork = aa;     aa += nz;
2205     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2206   }
2207 
2208   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2209   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   *newmat = M;
2211 
2212   /* save submatrix used in processor for next request */
2213   if (call ==  MAT_INITIAL_MATRIX) {
2214     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2215     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2216   }
2217 
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 EXTERN_C_BEGIN
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2224 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2225 {
2226   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2227   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2228   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2229   const PetscInt *JJ;
2230   PetscScalar    *values;
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2235   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2236   o_nnz = d_nnz + m;
2237 
2238   for (i=0; i<m; i++) {
2239     nnz     = I[i+1]- I[i];
2240     JJ      = J + I[i];
2241     nnz_max = PetscMax(nnz_max,nnz);
2242     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2243     for (j=0; j<nnz; j++) {
2244       if (*JJ >= cstart) break;
2245       JJ++;
2246     }
2247     d = 0;
2248     for (; j<nnz; j++) {
2249       if (*JJ++ >= cend) break;
2250       d++;
2251     }
2252     d_nnz[i] = d;
2253     o_nnz[i] = nnz - d;
2254   }
2255   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2256   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2257 
2258   if (v) values = (PetscScalar*)v;
2259   else {
2260     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2261     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2262   }
2263 
2264   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2265   for (i=0; i<m; i++) {
2266     ii   = i + rstart;
2267     nnz  = I[i+1]- I[i];
2268     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2269   }
2270   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2273 
2274   if (!v) {
2275     ierr = PetscFree(values);CHKERRQ(ierr);
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 EXTERN_C_END
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2283 /*@C
2284    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2285    (the default parallel PETSc format).
2286 
2287    Collective on MPI_Comm
2288 
2289    Input Parameters:
2290 +  B - the matrix
2291 .  i - the indices into j for the start of each local row (starts with zero)
2292 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2293 -  v - optional values in the matrix
2294 
2295    Level: developer
2296 
2297 .keywords: matrix, aij, compressed row, sparse, parallel
2298 
2299 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2300 @*/
2301 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2302 {
2303   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2304 
2305   PetscFunctionBegin;
2306   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2307   if (f) {
2308     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2309   }
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2315 /*@C
2316    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2317    (the default parallel PETSc format).  For good matrix assembly performance
2318    the user should preallocate the matrix storage by setting the parameters
2319    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2320    performance can be increased by more than a factor of 50.
2321 
2322    Collective on MPI_Comm
2323 
2324    Input Parameters:
2325 +  A - the matrix
2326 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2327            (same value is used for all local rows)
2328 .  d_nnz - array containing the number of nonzeros in the various rows of the
2329            DIAGONAL portion of the local submatrix (possibly different for each row)
2330            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2331            The size of this array is equal to the number of local rows, i.e 'm'.
2332            You must leave room for the diagonal entry even if it is zero.
2333 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2334            submatrix (same value is used for all local rows).
2335 -  o_nnz - array containing the number of nonzeros in the various rows of the
2336            OFF-DIAGONAL portion of the local submatrix (possibly different for
2337            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2338            structure. The size of this array is equal to the number
2339            of local rows, i.e 'm'.
2340 
2341    If the *_nnz parameter is given then the *_nz parameter is ignored
2342 
2343    The AIJ format (also called the Yale sparse matrix format or
2344    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2345    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2346 
2347    The parallel matrix is partitioned such that the first m0 rows belong to
2348    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2349    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2350 
2351    The DIAGONAL portion of the local submatrix of a processor can be defined
2352    as the submatrix which is obtained by extraction the part corresponding
2353    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2354    first row that belongs to the processor, and r2 is the last row belonging
2355    to the this processor. This is a square mxm matrix. The remaining portion
2356    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2357 
2358    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2359 
2360    Example usage:
2361 
2362    Consider the following 8x8 matrix with 34 non-zero values, that is
2363    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2364    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2365    as follows:
2366 
2367 .vb
2368             1  2  0  |  0  3  0  |  0  4
2369     Proc0   0  5  6  |  7  0  0  |  8  0
2370             9  0 10  | 11  0  0  | 12  0
2371     -------------------------------------
2372            13  0 14  | 15 16 17  |  0  0
2373     Proc1   0 18  0  | 19 20 21  |  0  0
2374             0  0  0  | 22 23  0  | 24  0
2375     -------------------------------------
2376     Proc2  25 26 27  |  0  0 28  | 29  0
2377            30  0  0  | 31 32 33  |  0 34
2378 .ve
2379 
2380    This can be represented as a collection of submatrices as:
2381 
2382 .vb
2383       A B C
2384       D E F
2385       G H I
2386 .ve
2387 
2388    Where the submatrices A,B,C are owned by proc0, D,E,F are
2389    owned by proc1, G,H,I are owned by proc2.
2390 
2391    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2392    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2393    The 'M','N' parameters are 8,8, and have the same values on all procs.
2394 
2395    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2396    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2397    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2398    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2399    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2400    matrix, ans [DF] as another SeqAIJ matrix.
2401 
2402    When d_nz, o_nz parameters are specified, d_nz storage elements are
2403    allocated for every row of the local diagonal submatrix, and o_nz
2404    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2405    One way to choose d_nz and o_nz is to use the max nonzerors per local
2406    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2407    In this case, the values of d_nz,o_nz are:
2408 .vb
2409      proc0 : dnz = 2, o_nz = 2
2410      proc1 : dnz = 3, o_nz = 2
2411      proc2 : dnz = 1, o_nz = 4
2412 .ve
2413    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2414    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2415    for proc3. i.e we are using 12+15+10=37 storage locations to store
2416    34 values.
2417 
2418    When d_nnz, o_nnz parameters are specified, the storage is specified
2419    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2420    In the above case the values for d_nnz,o_nnz are:
2421 .vb
2422      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2423      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2424      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2425 .ve
2426    Here the space allocated is sum of all the above values i.e 34, and
2427    hence pre-allocation is perfect.
2428 
2429    Level: intermediate
2430 
2431 .keywords: matrix, aij, compressed row, sparse, parallel
2432 
2433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2434           MPIAIJ
2435 @*/
2436 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2437 {
2438   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2442   if (f) {
2443     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2444   }
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatCreateMPIAIJ"
2450 /*@C
2451    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2452    (the default parallel PETSc format).  For good matrix assembly performance
2453    the user should preallocate the matrix storage by setting the parameters
2454    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2455    performance can be increased by more than a factor of 50.
2456 
2457    Collective on MPI_Comm
2458 
2459    Input Parameters:
2460 +  comm - MPI communicator
2461 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2462            This value should be the same as the local size used in creating the
2463            y vector for the matrix-vector product y = Ax.
2464 .  n - This value should be the same as the local size used in creating the
2465        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2466        calculated if N is given) For square matrices n is almost always m.
2467 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2468 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2469 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2470            (same value is used for all local rows)
2471 .  d_nnz - array containing the number of nonzeros in the various rows of the
2472            DIAGONAL portion of the local submatrix (possibly different for each row)
2473            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2474            The size of this array is equal to the number of local rows, i.e 'm'.
2475            You must leave room for the diagonal entry even if it is zero.
2476 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2477            submatrix (same value is used for all local rows).
2478 -  o_nnz - array containing the number of nonzeros in the various rows of the
2479            OFF-DIAGONAL portion of the local submatrix (possibly different for
2480            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2481            structure. The size of this array is equal to the number
2482            of local rows, i.e 'm'.
2483 
2484    Output Parameter:
2485 .  A - the matrix
2486 
2487    Notes:
2488    If the *_nnz parameter is given then the *_nz parameter is ignored
2489 
2490    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2491    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2492    storage requirements for this matrix.
2493 
2494    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2495    processor than it must be used on all processors that share the object for
2496    that argument.
2497 
2498    The user MUST specify either the local or global matrix dimensions
2499    (possibly both).
2500 
2501    The parallel matrix is partitioned such that the first m0 rows belong to
2502    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2503    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2504 
2505    The DIAGONAL portion of the local submatrix of a processor can be defined
2506    as the submatrix which is obtained by extraction the part corresponding
2507    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2508    first row that belongs to the processor, and r2 is the last row belonging
2509    to the this processor. This is a square mxm matrix. The remaining portion
2510    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2511 
2512    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2513 
2514    When calling this routine with a single process communicator, a matrix of
2515    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2516    type of communicator, use the construction mechanism:
2517      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2518 
2519    By default, this format uses inodes (identical nodes) when possible.
2520    We search for consecutive rows with the same nonzero structure, thereby
2521    reusing matrix information to achieve increased efficiency.
2522 
2523    Options Database Keys:
2524 +  -mat_no_inode  - Do not use inodes
2525 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2526 -  -mat_aij_oneindex - Internally use indexing starting at 1
2527         rather than 0.  Note that when calling MatSetValues(),
2528         the user still MUST index entries starting at 0!
2529 
2530 
2531    Example usage:
2532 
2533    Consider the following 8x8 matrix with 34 non-zero values, that is
2534    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2535    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2536    as follows:
2537 
2538 .vb
2539             1  2  0  |  0  3  0  |  0  4
2540     Proc0   0  5  6  |  7  0  0  |  8  0
2541             9  0 10  | 11  0  0  | 12  0
2542     -------------------------------------
2543            13  0 14  | 15 16 17  |  0  0
2544     Proc1   0 18  0  | 19 20 21  |  0  0
2545             0  0  0  | 22 23  0  | 24  0
2546     -------------------------------------
2547     Proc2  25 26 27  |  0  0 28  | 29  0
2548            30  0  0  | 31 32 33  |  0 34
2549 .ve
2550 
2551    This can be represented as a collection of submatrices as:
2552 
2553 .vb
2554       A B C
2555       D E F
2556       G H I
2557 .ve
2558 
2559    Where the submatrices A,B,C are owned by proc0, D,E,F are
2560    owned by proc1, G,H,I are owned by proc2.
2561 
2562    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2563    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2564    The 'M','N' parameters are 8,8, and have the same values on all procs.
2565 
2566    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2567    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2568    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2569    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2570    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2571    matrix, ans [DF] as another SeqAIJ matrix.
2572 
2573    When d_nz, o_nz parameters are specified, d_nz storage elements are
2574    allocated for every row of the local diagonal submatrix, and o_nz
2575    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2576    One way to choose d_nz and o_nz is to use the max nonzerors per local
2577    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2578    In this case, the values of d_nz,o_nz are:
2579 .vb
2580      proc0 : dnz = 2, o_nz = 2
2581      proc1 : dnz = 3, o_nz = 2
2582      proc2 : dnz = 1, o_nz = 4
2583 .ve
2584    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2585    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2586    for proc3. i.e we are using 12+15+10=37 storage locations to store
2587    34 values.
2588 
2589    When d_nnz, o_nnz parameters are specified, the storage is specified
2590    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2591    In the above case the values for d_nnz,o_nnz are:
2592 .vb
2593      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2594      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2595      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2596 .ve
2597    Here the space allocated is sum of all the above values i.e 34, and
2598    hence pre-allocation is perfect.
2599 
2600    Level: intermediate
2601 
2602 .keywords: matrix, aij, compressed row, sparse, parallel
2603 
2604 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2605           MPIAIJ
2606 @*/
2607 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2608 {
2609   PetscErrorCode ierr;
2610   PetscMPIInt    size;
2611 
2612   PetscFunctionBegin;
2613   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2614   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2616   if (size > 1) {
2617     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2618     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2619   } else {
2620     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2621     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2622   }
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2628 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2629 {
2630   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2631 
2632   PetscFunctionBegin;
2633   *Ad     = a->A;
2634   *Ao     = a->B;
2635   *colmap = a->garray;
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 #undef __FUNCT__
2640 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2641 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2642 {
2643   PetscErrorCode ierr;
2644   PetscInt       i;
2645   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2646 
2647   PetscFunctionBegin;
2648   if (coloring->ctype == IS_COLORING_LOCAL) {
2649     ISColoringValue *allcolors,*colors;
2650     ISColoring      ocoloring;
2651 
2652     /* set coloring for diagonal portion */
2653     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2654 
2655     /* set coloring for off-diagonal portion */
2656     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2657     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2658     for (i=0; i<a->B->n; i++) {
2659       colors[i] = allcolors[a->garray[i]];
2660     }
2661     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2662     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2663     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2664     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2665   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2666     ISColoringValue *colors;
2667     PetscInt             *larray;
2668     ISColoring      ocoloring;
2669 
2670     /* set coloring for diagonal portion */
2671     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2672     for (i=0; i<a->A->n; i++) {
2673       larray[i] = i + a->cstart;
2674     }
2675     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2676     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2677     for (i=0; i<a->A->n; i++) {
2678       colors[i] = coloring->colors[larray[i]];
2679     }
2680     ierr = PetscFree(larray);CHKERRQ(ierr);
2681     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2682     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2683     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2684 
2685     /* set coloring for off-diagonal portion */
2686     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2687     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2688     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2689     for (i=0; i<a->B->n; i++) {
2690       colors[i] = coloring->colors[larray[i]];
2691     }
2692     ierr = PetscFree(larray);CHKERRQ(ierr);
2693     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2694     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2695     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2696   } else {
2697     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2698   }
2699 
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 #if defined(PETSC_HAVE_ADIC)
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2706 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2707 {
2708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2713   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 #endif
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2720 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2721 {
2722   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2727   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatMerge"
2733 /*@C
2734       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2735                  matrices from each processor
2736 
2737     Collective on MPI_Comm
2738 
2739    Input Parameters:
2740 +    comm - the communicators the parallel matrix will live on
2741 .    inmat - the input sequential matrices
2742 .    n - number of local columns (or PETSC_DECIDE)
2743 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2744 
2745    Output Parameter:
2746 .    outmat - the parallel matrix generated
2747 
2748     Level: advanced
2749 
2750    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2751 
2752 @*/
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2754 {
2755   PetscErrorCode ierr;
2756   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2757   PetscInt       *indx;
2758   PetscScalar    *values;
2759   PetscMap       columnmap,rowmap;
2760 
2761   PetscFunctionBegin;
2762     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2763   /*
2764   PetscMPIInt       rank;
2765   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2766   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2767   */
2768   if (scall == MAT_INITIAL_MATRIX){
2769     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2770     if (n == PETSC_DECIDE){
2771       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2772       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2773       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2774       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2775       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2776     }
2777 
2778     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2779     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2780     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2781     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2782     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2783 
2784     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2785     for (i=0;i<m;i++) {
2786       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2787       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2788       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2789     }
2790     /* This routine will ONLY return MPIAIJ type matrix */
2791     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2792     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2793     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2794     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2795     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2796 
2797   } else if (scall == MAT_REUSE_MATRIX){
2798     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2799   } else {
2800     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2801   }
2802 
2803   for (i=0;i<m;i++) {
2804     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2805     I    = i + rstart;
2806     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2807     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2808   }
2809   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2810   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812 
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatFileSplit"
2818 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2819 {
2820   PetscErrorCode    ierr;
2821   PetscMPIInt       rank;
2822   PetscInt          m,N,i,rstart,nnz;
2823   size_t            len;
2824   const PetscInt    *indx;
2825   PetscViewer       out;
2826   char              *name;
2827   Mat               B;
2828   const PetscScalar *values;
2829 
2830   PetscFunctionBegin;
2831   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2832   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2833   /* Should this be the type of the diagonal block of A? */
2834   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2835   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2836   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2837   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2838   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2839   for (i=0;i<m;i++) {
2840     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2841     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2842     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2843   }
2844   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2846 
2847   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2848   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2849   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2850   sprintf(name,"%s.%d",outfile,rank);
2851   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2852   ierr = PetscFree(name);
2853   ierr = MatView(B,out);CHKERRQ(ierr);
2854   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2855   ierr = MatDestroy(B);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2862 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2863 {
2864   PetscErrorCode       ierr;
2865   Mat_Merge_SeqsToMPI  *merge;
2866   PetscObjectContainer container;
2867 
2868   PetscFunctionBegin;
2869   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2870   if (container) {
2871     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2872     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2873     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2874     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2875     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2876     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2877     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2878     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2879     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2880     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2881     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2882     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2883 
2884     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2885     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2886   }
2887   ierr = PetscFree(merge);CHKERRQ(ierr);
2888 
2889   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #include "src/mat/utils/freespace.h"
2894 #include "petscbt.h"
2895 static PetscEvent logkey_seqstompinum = 0;
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2898 /*@C
2899       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2900                  matrices from each processor
2901 
2902     Collective on MPI_Comm
2903 
2904    Input Parameters:
2905 +    comm - the communicators the parallel matrix will live on
2906 .    seqmat - the input sequential matrices
2907 .    m - number of local rows (or PETSC_DECIDE)
2908 .    n - number of local columns (or PETSC_DECIDE)
2909 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2910 
2911    Output Parameter:
2912 .    mpimat - the parallel matrix generated
2913 
2914     Level: advanced
2915 
2916    Notes:
2917      The dimensions of the sequential matrix in each processor MUST be the same.
2918      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2919      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2920 @*/
2921 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2922 {
2923   PetscErrorCode       ierr;
2924   MPI_Comm             comm=mpimat->comm;
2925   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2926   PetscMPIInt          size,rank,taga,*len_s;
2927   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2928   PetscInt             proc,m;
2929   PetscInt             **buf_ri,**buf_rj;
2930   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2931   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2932   MPI_Request          *s_waits,*r_waits;
2933   MPI_Status           *status;
2934   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2935   Mat_Merge_SeqsToMPI  *merge;
2936   PetscObjectContainer container;
2937 
2938   PetscFunctionBegin;
2939   if (!logkey_seqstompinum) {
2940     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2941   }
2942   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2943 
2944   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2945   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2946 
2947   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2948   if (container) {
2949     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2950   }
2951   bi     = merge->bi;
2952   bj     = merge->bj;
2953   buf_ri = merge->buf_ri;
2954   buf_rj = merge->buf_rj;
2955 
2956   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2957   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2958   len_s  = merge->len_s;
2959 
2960   /* send and recv matrix values */
2961   /*-----------------------------*/
2962   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2963   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2964 
2965   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2966   for (proc=0,k=0; proc<size; proc++){
2967     if (!len_s[proc]) continue;
2968     i = owners[proc];
2969     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2970     k++;
2971   }
2972 
2973   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2974   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2975   ierr = PetscFree(status);CHKERRQ(ierr);
2976 
2977   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2978   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2979 
2980   /* insert mat values of mpimat */
2981   /*----------------------------*/
2982   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2983   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2984   nextrow = buf_ri_k + merge->nrecv;
2985   nextai  = nextrow + merge->nrecv;
2986 
2987   for (k=0; k<merge->nrecv; k++){
2988     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2989     nrows = *(buf_ri_k[k]);
2990     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2991     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2992   }
2993 
2994   /* set values of ba */
2995   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2996   for (i=0; i<m; i++) {
2997     arow = owners[rank] + i;
2998     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2999     bnzi = bi[i+1] - bi[i];
3000     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3001 
3002     /* add local non-zero vals of this proc's seqmat into ba */
3003     anzi = ai[arow+1] - ai[arow];
3004     aj   = a->j + ai[arow];
3005     aa   = a->a + ai[arow];
3006     nextaj = 0;
3007     for (j=0; nextaj<anzi; j++){
3008       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3009         ba_i[j] += aa[nextaj++];
3010       }
3011     }
3012 
3013     /* add received vals into ba */
3014     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3015       /* i-th row */
3016       if (i == *nextrow[k]) {
3017         anzi = *(nextai[k]+1) - *nextai[k];
3018         aj   = buf_rj[k] + *(nextai[k]);
3019         aa   = abuf_r[k] + *(nextai[k]);
3020         nextaj = 0;
3021         for (j=0; nextaj<anzi; j++){
3022           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3023             ba_i[j] += aa[nextaj++];
3024           }
3025         }
3026         nextrow[k]++; nextai[k]++;
3027       }
3028     }
3029     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3030   }
3031   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033 
3034   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3035   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3036   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3037   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 static PetscEvent logkey_seqstompisym = 0;
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3045 {
3046   PetscErrorCode       ierr;
3047   Mat                  B_mpi;
3048   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3049   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3050   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3051   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3052   PetscInt             len,proc,*dnz,*onz;
3053   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3054   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3055   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3056   MPI_Status           *status;
3057   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3058   PetscBT              lnkbt;
3059   Mat_Merge_SeqsToMPI  *merge;
3060   PetscObjectContainer container;
3061 
3062   PetscFunctionBegin;
3063   if (!logkey_seqstompisym) {
3064     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3065   }
3066   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3067 
3068   /* make sure it is a PETSc comm */
3069   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3071   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3072 
3073   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3074   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3075 
3076   /* determine row ownership */
3077   /*---------------------------------------------------------*/
3078   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3079   if (m == PETSC_DECIDE) {
3080     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3081   } else {
3082     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3083   }
3084   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3085   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3086   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3087 
3088   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3089   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3090 
3091   /* determine the number of messages to send, their lengths */
3092   /*---------------------------------------------------------*/
3093   len_s  = merge->len_s;
3094 
3095   len = 0;  /* length of buf_si[] */
3096   merge->nsend = 0;
3097   for (proc=0; proc<size; proc++){
3098     len_si[proc] = 0;
3099     if (proc == rank){
3100       len_s[proc] = 0;
3101     } else {
3102       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3103       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3104     }
3105     if (len_s[proc]) {
3106       merge->nsend++;
3107       nrows = 0;
3108       for (i=owners[proc]; i<owners[proc+1]; i++){
3109         if (ai[i+1] > ai[i]) nrows++;
3110       }
3111       len_si[proc] = 2*(nrows+1);
3112       len += len_si[proc];
3113     }
3114   }
3115 
3116   /* determine the number and length of messages to receive for ij-structure */
3117   /*-------------------------------------------------------------------------*/
3118   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3119   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3120 
3121   /* post the Irecv of j-structure */
3122   /*-------------------------------*/
3123   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3124   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3125 
3126   /* post the Isend of j-structure */
3127   /*--------------------------------*/
3128   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3129   sj_waits = si_waits + merge->nsend;
3130 
3131   for (proc=0, k=0; proc<size; proc++){
3132     if (!len_s[proc]) continue;
3133     i = owners[proc];
3134     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3135     k++;
3136   }
3137 
3138   /* receives and sends of j-structure are complete */
3139   /*------------------------------------------------*/
3140   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3141   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3142 
3143   /* send and recv i-structure */
3144   /*---------------------------*/
3145   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3146   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3147 
3148   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3149   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3150   for (proc=0,k=0; proc<size; proc++){
3151     if (!len_s[proc]) continue;
3152     /* form outgoing message for i-structure:
3153          buf_si[0]:                 nrows to be sent
3154                [1:nrows]:           row index (global)
3155                [nrows+1:2*nrows+1]: i-structure index
3156     */
3157     /*-------------------------------------------*/
3158     nrows = len_si[proc]/2 - 1;
3159     buf_si_i    = buf_si + nrows+1;
3160     buf_si[0]   = nrows;
3161     buf_si_i[0] = 0;
3162     nrows = 0;
3163     for (i=owners[proc]; i<owners[proc+1]; i++){
3164       anzi = ai[i+1] - ai[i];
3165       if (anzi) {
3166         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3167         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3168         nrows++;
3169       }
3170     }
3171     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3172     k++;
3173     buf_si += len_si[proc];
3174   }
3175 
3176   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3177   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3178 
3179   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3180   for (i=0; i<merge->nrecv; i++){
3181     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3182   }
3183 
3184   ierr = PetscFree(len_si);CHKERRQ(ierr);
3185   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3186   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3187   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3188   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3189   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3190   ierr = PetscFree(status);CHKERRQ(ierr);
3191 
3192   /* compute a local seq matrix in each processor */
3193   /*----------------------------------------------*/
3194   /* allocate bi array and free space for accumulating nonzero column info */
3195   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3196   bi[0] = 0;
3197 
3198   /* create and initialize a linked list */
3199   nlnk = N+1;
3200   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3201 
3202   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3203   len = 0;
3204   len  = ai[owners[rank+1]] - ai[owners[rank]];
3205   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3206   current_space = free_space;
3207 
3208   /* determine symbolic info for each local row */
3209   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3210   nextrow = buf_ri_k + merge->nrecv;
3211   nextai  = nextrow + merge->nrecv;
3212   for (k=0; k<merge->nrecv; k++){
3213     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3214     nrows = *buf_ri_k[k];
3215     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3216     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3217   }
3218 
3219   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3220   len = 0;
3221   for (i=0;i<m;i++) {
3222     bnzi   = 0;
3223     /* add local non-zero cols of this proc's seqmat into lnk */
3224     arow   = owners[rank] + i;
3225     anzi   = ai[arow+1] - ai[arow];
3226     aj     = a->j + ai[arow];
3227     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3228     bnzi += nlnk;
3229     /* add received col data into lnk */
3230     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3231       if (i == *nextrow[k]) { /* i-th row */
3232         anzi = *(nextai[k]+1) - *nextai[k];
3233         aj   = buf_rj[k] + *nextai[k];
3234         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3235         bnzi += nlnk;
3236         nextrow[k]++; nextai[k]++;
3237       }
3238     }
3239     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3240 
3241     /* if free space is not available, make more free space */
3242     if (current_space->local_remaining<bnzi) {
3243       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3244       nspacedouble++;
3245     }
3246     /* copy data into free space, then initialize lnk */
3247     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3248     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3249 
3250     current_space->array           += bnzi;
3251     current_space->local_used      += bnzi;
3252     current_space->local_remaining -= bnzi;
3253 
3254     bi[i+1] = bi[i] + bnzi;
3255   }
3256 
3257   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3258 
3259   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3260   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3261   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3262 
3263   /* create symbolic parallel matrix B_mpi */
3264   /*---------------------------------------*/
3265   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3266   if (n==PETSC_DECIDE) {
3267     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3268   } else {
3269     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3270   }
3271   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3272   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3273   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3274 
3275   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3276   B_mpi->assembled     = PETSC_FALSE;
3277   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3278   merge->bi            = bi;
3279   merge->bj            = bj;
3280   merge->buf_ri        = buf_ri;
3281   merge->buf_rj        = buf_rj;
3282   merge->coi           = PETSC_NULL;
3283   merge->coj           = PETSC_NULL;
3284   merge->owners_co     = PETSC_NULL;
3285 
3286   /* attach the supporting struct to B_mpi for reuse */
3287   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3288   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3289   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3290   *mpimat = B_mpi;
3291 
3292   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3293   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 static PetscEvent logkey_seqstompi = 0;
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMerge_SeqsToMPI"
3300 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3301 {
3302   PetscErrorCode   ierr;
3303 
3304   PetscFunctionBegin;
3305   if (!logkey_seqstompi) {
3306     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3307   }
3308   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3309   if (scall == MAT_INITIAL_MATRIX){
3310     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3311   }
3312   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3313   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 static PetscEvent logkey_getlocalmat = 0;
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatGetLocalMat"
3319 /*@C
3320      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3321 
3322     Not Collective
3323 
3324    Input Parameters:
3325 +    A - the matrix
3326 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3327 
3328    Output Parameter:
3329 .    A_loc - the local sequential matrix generated
3330 
3331     Level: developer
3332 
3333 @*/
3334 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3335 {
3336   PetscErrorCode  ierr;
3337   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3338   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3339   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3340   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3341   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3342   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3343 
3344   PetscFunctionBegin;
3345   if (!logkey_getlocalmat) {
3346     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3347   }
3348   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3349   if (scall == MAT_INITIAL_MATRIX){
3350     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3351     ci[0] = 0;
3352     for (i=0; i<am; i++){
3353       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3354     }
3355     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3356     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3357     k = 0;
3358     for (i=0; i<am; i++) {
3359       ncols_o = bi[i+1] - bi[i];
3360       ncols_d = ai[i+1] - ai[i];
3361       /* off-diagonal portion of A */
3362       for (jo=0; jo<ncols_o; jo++) {
3363         col = cmap[*bj];
3364         if (col >= cstart) break;
3365         cj[k]   = col; bj++;
3366         ca[k++] = *ba++;
3367       }
3368       /* diagonal portion of A */
3369       for (j=0; j<ncols_d; j++) {
3370         cj[k]   = cstart + *aj++;
3371         ca[k++] = *aa++;
3372       }
3373       /* off-diagonal portion of A */
3374       for (j=jo; j<ncols_o; j++) {
3375         cj[k]   = cmap[*bj++];
3376         ca[k++] = *ba++;
3377       }
3378     }
3379     /* put together the new matrix */
3380     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3381     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3382     /* Since these are PETSc arrays, change flags to free them as necessary. */
3383     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3384     mat->freedata = PETSC_TRUE;
3385     mat->nonew    = 0;
3386   } else if (scall == MAT_REUSE_MATRIX){
3387     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3388     ci = mat->i; cj = mat->j; ca = mat->a;
3389     for (i=0; i<am; i++) {
3390       /* off-diagonal portion of A */
3391       ncols_o = bi[i+1] - bi[i];
3392       for (jo=0; jo<ncols_o; jo++) {
3393         col = cmap[*bj];
3394         if (col >= cstart) break;
3395         *ca++ = *ba++; bj++;
3396       }
3397       /* diagonal portion of A */
3398       ncols_d = ai[i+1] - ai[i];
3399       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3400       /* off-diagonal portion of A */
3401       for (j=jo; j<ncols_o; j++) {
3402         *ca++ = *ba++; bj++;
3403       }
3404     }
3405   } else {
3406     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3407   }
3408 
3409   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 static PetscEvent logkey_getlocalmatcondensed = 0;
3414 #undef __FUNCT__
3415 #define __FUNCT__ "MatGetLocalMatCondensed"
3416 /*@C
3417      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3418 
3419     Not Collective
3420 
3421    Input Parameters:
3422 +    A - the matrix
3423 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3424 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3425 
3426    Output Parameter:
3427 .    A_loc - the local sequential matrix generated
3428 
3429     Level: developer
3430 
3431 @*/
3432 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3433 {
3434   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3435   PetscErrorCode    ierr;
3436   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3437   IS                isrowa,iscola;
3438   Mat               *aloc;
3439 
3440   PetscFunctionBegin;
3441   if (!logkey_getlocalmatcondensed) {
3442     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3443   }
3444   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3445   if (!row){
3446     start = a->rstart; end = a->rend;
3447     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3448   } else {
3449     isrowa = *row;
3450   }
3451   if (!col){
3452     start = a->cstart;
3453     cmap  = a->garray;
3454     nzA   = a->A->n;
3455     nzB   = a->B->n;
3456     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3457     ncols = 0;
3458     for (i=0; i<nzB; i++) {
3459       if (cmap[i] < start) idx[ncols++] = cmap[i];
3460       else break;
3461     }
3462     imark = i;
3463     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3464     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3465     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3466     ierr = PetscFree(idx);CHKERRQ(ierr);
3467   } else {
3468     iscola = *col;
3469   }
3470   if (scall != MAT_INITIAL_MATRIX){
3471     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3472     aloc[0] = *A_loc;
3473   }
3474   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3475   *A_loc = aloc[0];
3476   ierr = PetscFree(aloc);CHKERRQ(ierr);
3477   if (!row){
3478     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3479   }
3480   if (!col){
3481     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3482   }
3483   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 static PetscEvent logkey_GetBrowsOfAcols = 0;
3488 #undef __FUNCT__
3489 #define __FUNCT__ "MatGetBrowsOfAcols"
3490 /*@C
3491     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3492 
3493     Collective on Mat
3494 
3495    Input Parameters:
3496 +    A,B - the matrices in mpiaij format
3497 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3498 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3499 
3500    Output Parameter:
3501 +    rowb, colb - index sets of rows and columns of B to extract
3502 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3503 -    B_seq - the sequential matrix generated
3504 
3505     Level: developer
3506 
3507 @*/
3508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3509 {
3510   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3511   PetscErrorCode    ierr;
3512   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3513   IS                isrowb,iscolb;
3514   Mat               *bseq;
3515 
3516   PetscFunctionBegin;
3517   if (a->cstart != b->rstart || a->cend != b->rend){
3518     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3519   }
3520   if (!logkey_GetBrowsOfAcols) {
3521     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3522   }
3523   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3524 
3525   if (scall == MAT_INITIAL_MATRIX){
3526     start = a->cstart;
3527     cmap  = a->garray;
3528     nzA   = a->A->n;
3529     nzB   = a->B->n;
3530     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3531     ncols = 0;
3532     for (i=0; i<nzB; i++) {  /* row < local row index */
3533       if (cmap[i] < start) idx[ncols++] = cmap[i];
3534       else break;
3535     }
3536     imark = i;
3537     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3538     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3539     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3540     ierr = PetscFree(idx);CHKERRQ(ierr);
3541     *brstart = imark;
3542     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3543   } else {
3544     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3545     isrowb = *rowb; iscolb = *colb;
3546     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3547     bseq[0] = *B_seq;
3548   }
3549   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3550   *B_seq = bseq[0];
3551   ierr = PetscFree(bseq);CHKERRQ(ierr);
3552   if (!rowb){
3553     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3554   } else {
3555     *rowb = isrowb;
3556   }
3557   if (!colb){
3558     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3559   } else {
3560     *colb = iscolb;
3561   }
3562   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 static PetscEvent logkey_GetBrowsOfAocols = 0;
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatGetBrowsOfAoCols"
3569 /*@C
3570     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3571     of the OFF-DIAGONAL portion of local A
3572 
3573     Collective on Mat
3574 
3575    Input Parameters:
3576 +    A,B - the matrices in mpiaij format
3577 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3578 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3579 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3580 
3581    Output Parameter:
3582 +    B_oth - the sequential matrix generated
3583 
3584     Level: developer
3585 
3586 @*/
3587 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3588 {
3589   VecScatter_MPI_General *gen_to,*gen_from;
3590   PetscErrorCode         ierr;
3591   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3592   Mat_SeqAIJ             *b_oth;
3593   VecScatter             ctx=a->Mvctx;
3594   MPI_Comm               comm=ctx->comm;
3595   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3596   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3597   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3598   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3599   MPI_Request            *rwaits,*swaits;
3600   MPI_Status             *sstatus,rstatus;
3601   PetscInt               *cols;
3602   PetscScalar            *vals;
3603   PetscMPIInt            j;
3604 
3605   PetscFunctionBegin;
3606   if (a->cstart != b->rstart || a->cend != b->rend){
3607     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3608   }
3609   if (!logkey_GetBrowsOfAocols) {
3610     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3611   }
3612   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3613   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3614 
3615   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3616   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3617   rvalues  = gen_from->values; /* holds the length of sending row */
3618   svalues  = gen_to->values;   /* holds the length of receiving row */
3619   nrecvs   = gen_from->n;
3620   nsends   = gen_to->n;
3621   rwaits   = gen_from->requests;
3622   swaits   = gen_to->requests;
3623   srow     = gen_to->indices;   /* local row index to be sent */
3624   rstarts  = gen_from->starts;
3625   sstarts  = gen_to->starts;
3626   rprocs   = gen_from->procs;
3627   sprocs   = gen_to->procs;
3628   sstatus  = gen_to->sstatus;
3629 
3630   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3631   if (scall == MAT_INITIAL_MATRIX){
3632     /* i-array */
3633     /*---------*/
3634     /*  post receives */
3635     for (i=0; i<nrecvs; i++){
3636       rowlen = (PetscInt*)rvalues + rstarts[i];
3637       nrows = rstarts[i+1]-rstarts[i];
3638       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3639     }
3640 
3641     /* pack the outgoing message */
3642     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3643     rstartsj = sstartsj + nsends +1;
3644     sstartsj[0] = 0;  rstartsj[0] = 0;
3645     len = 0; /* total length of j or a array to be sent */
3646     k = 0;
3647     for (i=0; i<nsends; i++){
3648       rowlen = (PetscInt*)svalues + sstarts[i];
3649       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3650       for (j=0; j<nrows; j++) {
3651         row = srow[k] + b->rowners[rank]; /* global row idx */
3652         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3653         len += rowlen[j];
3654         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3655         k++;
3656       }
3657       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3658        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3659     }
3660     /* recvs and sends of i-array are completed */
3661     i = nrecvs;
3662     while (i--) {
3663       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3664     }
3665     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3666     /* allocate buffers for sending j and a arrays */
3667     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3668     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3669 
3670     /* create i-array of B_oth */
3671     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3672     b_othi[0] = 0;
3673     len = 0; /* total length of j or a array to be received */
3674     k = 0;
3675     for (i=0; i<nrecvs; i++){
3676       rowlen = (PetscInt*)rvalues + rstarts[i];
3677       nrows = rstarts[i+1]-rstarts[i];
3678       for (j=0; j<nrows; j++) {
3679         b_othi[k+1] = b_othi[k] + rowlen[j];
3680         len += rowlen[j]; k++;
3681       }
3682       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3683     }
3684 
3685     /* allocate space for j and a arrrays of B_oth */
3686     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3687     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3688 
3689     /* j-array */
3690     /*---------*/
3691     /*  post receives of j-array */
3692     for (i=0; i<nrecvs; i++){
3693       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3694       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3695     }
3696     k = 0;
3697     for (i=0; i<nsends; i++){
3698       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3699       bufJ = bufj+sstartsj[i];
3700       for (j=0; j<nrows; j++) {
3701         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3702         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3703         for (l=0; l<ncols; l++){
3704           *bufJ++ = cols[l];
3705         }
3706         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3707       }
3708       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3709     }
3710 
3711     /* recvs and sends of j-array are completed */
3712     i = nrecvs;
3713     while (i--) {
3714       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3715     }
3716     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3717   } else if (scall == MAT_REUSE_MATRIX){
3718     sstartsj = *startsj;
3719     rstartsj = sstartsj + nsends +1;
3720     bufa     = *bufa_ptr;
3721     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3722     b_otha   = b_oth->a;
3723   } else {
3724     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3725   }
3726 
3727   /* a-array */
3728   /*---------*/
3729   /*  post receives of a-array */
3730   for (i=0; i<nrecvs; i++){
3731     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3732     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3733   }
3734   k = 0;
3735   for (i=0; i<nsends; i++){
3736     nrows = sstarts[i+1]-sstarts[i];
3737     bufA = bufa+sstartsj[i];
3738     for (j=0; j<nrows; j++) {
3739       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3740       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3741       for (l=0; l<ncols; l++){
3742         *bufA++ = vals[l];
3743       }
3744       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3745 
3746     }
3747     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3748   }
3749   /* recvs and sends of a-array are completed */
3750   i = nrecvs;
3751   while (i--) {
3752     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3753   }
3754    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3755 
3756   if (scall == MAT_INITIAL_MATRIX){
3757     /* put together the new matrix */
3758     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3759 
3760     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3761     /* Since these are PETSc arrays, change flags to free them as necessary. */
3762     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3763     b_oth->freedata = PETSC_TRUE;
3764     b_oth->nonew    = 0;
3765 
3766     ierr = PetscFree(bufj);CHKERRQ(ierr);
3767     if (!startsj || !bufa_ptr){
3768       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3769       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3770     } else {
3771       *startsj  = sstartsj;
3772       *bufa_ptr = bufa;
3773     }
3774   }
3775   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3776 
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 /*MC
3781    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3782 
3783    Options Database Keys:
3784 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3785 
3786   Level: beginner
3787 
3788 .seealso: MatCreateMPIAIJ
3789 M*/
3790 
3791 EXTERN_C_BEGIN
3792 #undef __FUNCT__
3793 #define __FUNCT__ "MatCreate_MPIAIJ"
3794 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3795 {
3796   Mat_MPIAIJ     *b;
3797   PetscErrorCode ierr;
3798   PetscInt       i;
3799   PetscMPIInt    size;
3800 
3801   PetscFunctionBegin;
3802   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3803 
3804   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3805   B->data         = (void*)b;
3806   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3807   B->factor       = 0;
3808   B->bs           = 1;
3809   B->assembled    = PETSC_FALSE;
3810   B->mapping      = 0;
3811 
3812   B->insertmode      = NOT_SET_VALUES;
3813   b->size            = size;
3814   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3815 
3816   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3817   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3818 
3819   /* the information in the maps duplicates the information computed below, eventually
3820      we should remove the duplicate information that is not contained in the maps */
3821   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3822   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3823 
3824   /* build local table of row and column ownerships */
3825   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3826   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3827   b->cowners = b->rowners + b->size + 2;
3828   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3829   b->rowners[0] = 0;
3830   for (i=2; i<=b->size; i++) {
3831     b->rowners[i] += b->rowners[i-1];
3832   }
3833   b->rstart = b->rowners[b->rank];
3834   b->rend   = b->rowners[b->rank+1];
3835   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3836   b->cowners[0] = 0;
3837   for (i=2; i<=b->size; i++) {
3838     b->cowners[i] += b->cowners[i-1];
3839   }
3840   b->cstart = b->cowners[b->rank];
3841   b->cend   = b->cowners[b->rank+1];
3842 
3843   /* build cache for off array entries formed */
3844   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3845   b->donotstash  = PETSC_FALSE;
3846   b->colmap      = 0;
3847   b->garray      = 0;
3848   b->roworiented = PETSC_TRUE;
3849 
3850   /* stuff used for matrix vector multiply */
3851   b->lvec      = PETSC_NULL;
3852   b->Mvctx     = PETSC_NULL;
3853 
3854   /* stuff for MatGetRow() */
3855   b->rowindices   = 0;
3856   b->rowvalues    = 0;
3857   b->getrowactive = PETSC_FALSE;
3858 
3859   /* Explicitly create 2 MATSEQAIJ matrices. */
3860   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3861   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3862   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3863   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3864   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3865   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3866   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3867   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3868 
3869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3870                                      "MatStoreValues_MPIAIJ",
3871                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3872   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3873                                      "MatRetrieveValues_MPIAIJ",
3874                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3875   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3876 				     "MatGetDiagonalBlock_MPIAIJ",
3877                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3878   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3879 				     "MatIsTranspose_MPIAIJ",
3880 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3881   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3882 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3883 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3884   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3885 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3886 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3887   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3888 				     "MatDiagonalScaleLocal_MPIAIJ",
3889 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 EXTERN_C_END
3893