xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380 #if defined(PETSC_DEBUG)
381   PetscTruth     found = PETSC_FALSE;
382 #endif
383 
384   PetscFunctionBegin;
385   /*  first count number of contributors to each processor */
386   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
387   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
388   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
389   j = 0;
390   for (i=0; i<N; i++) {
391     if (lastidx > (idx = rows[i])) j = 0;
392     lastidx = idx;
393     for (; j<size; j++) {
394       if (idx >= owners[j] && idx < owners[j+1]) {
395         nprocs[2*j]++;
396         nprocs[2*j+1] = 1;
397         owner[i] = j;
398 #if defined(PETSC_DEBUG)
399         found = PETSC_TRUE;
400 #endif
401         break;
402       }
403     }
404 #if defined(PETSC_DEBUG)
405     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
406     found = PETSC_FALSE;
407 #endif
408   }
409   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
410 
411   /* inform other processors of number of messages and max length*/
412   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
413 
414   /* post receives:   */
415   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
416   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
417   for (i=0; i<nrecvs; i++) {
418     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
419   }
420 
421   /* do sends:
422       1) starts[i] gives the starting index in svalues for stuff going to
423          the ith processor
424   */
425   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
426   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
427   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
428   starts[0] = 0;
429   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
430   for (i=0; i<N; i++) {
431     svalues[starts[owner[i]]++] = rows[i];
432   }
433 
434   starts[0] = 0;
435   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
436   count = 0;
437   for (i=0; i<size; i++) {
438     if (nprocs[2*i+1]) {
439       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
440     }
441   }
442   ierr = PetscFree(starts);CHKERRQ(ierr);
443 
444   base = owners[rank];
445 
446   /*  wait on receives */
447   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
448   source = lens + nrecvs;
449   count  = nrecvs; slen = 0;
450   while (count) {
451     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
452     /* unpack receives into our local space */
453     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
454     source[imdex]  = recv_status.MPI_SOURCE;
455     lens[imdex]    = n;
456     slen          += n;
457     count--;
458   }
459   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
460 
461   /* move the data into the send scatter */
462   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
463   count = 0;
464   for (i=0; i<nrecvs; i++) {
465     values = rvalues + i*nmax;
466     for (j=0; j<lens[i]; j++) {
467       lrows[count++] = values[j] - base;
468     }
469   }
470   ierr = PetscFree(rvalues);CHKERRQ(ierr);
471   ierr = PetscFree(lens);CHKERRQ(ierr);
472   ierr = PetscFree(owner);CHKERRQ(ierr);
473   ierr = PetscFree(nprocs);CHKERRQ(ierr);
474 
475   /* actually zap the local rows */
476   /*
477         Zero the required rows. If the "diagonal block" of the matrix
478      is square and the user wishes to set the diagonal we use seperate
479      code so that MatSetValues() is not called for each diagonal allocating
480      new memory, thus calling lots of mallocs and slowing things down.
481 
482        Contributed by: Matthew Knepley
483   */
484   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
485   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
486   if ((diag != 0.0) && (l->A->M == l->A->N)) {
487     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
488   } else if (diag != 0.0) {
489     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
490     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
491       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
493     }
494     for (i = 0; i < slen; i++) {
495       row  = lrows[i] + rstart;
496       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
497     }
498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
500   } else {
501     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
502   }
503   ierr = PetscFree(lrows);CHKERRQ(ierr);
504 
505   /* wait on sends */
506   if (nsends) {
507     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
508     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
509     ierr = PetscFree(send_status);CHKERRQ(ierr);
510   }
511   ierr = PetscFree(send_waits);CHKERRQ(ierr);
512   ierr = PetscFree(svalues);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatMult_MPIAIJ"
519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
520 {
521   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
522   PetscErrorCode ierr;
523   PetscInt       nt;
524 
525   PetscFunctionBegin;
526   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
527   if (nt != A->n) {
528     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
529   }
530   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
531   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
532   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
533   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatMultAdd_MPIAIJ"
539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
540 {
541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
546   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
547   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
548   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
555 {
556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
557   PetscErrorCode ierr;
558   PetscTruth     merged;
559 
560   PetscFunctionBegin;
561   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
562   /* do nondiagonal part */
563   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
564   if (!merged) {
565     /* send it on its way */
566     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
567     /* do local part */
568     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
569     /* receive remote parts: note this assumes the values are not actually */
570     /* added in yy until the next line, */
571     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
572   } else {
573     /* do local part */
574     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
575     /* send it on its way */
576     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
577     /* values actually were received in the Begin() but we need to call this nop */
578     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
587 {
588   MPI_Comm       comm;
589   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
590   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
591   IS             Me,Notme;
592   PetscErrorCode ierr;
593   PetscInt       M,N,first,last,*notme,i;
594   PetscMPIInt    size;
595 
596   PetscFunctionBegin;
597 
598   /* Easy test: symmetric diagonal block */
599   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
600   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
601   if (!*f) PetscFunctionReturn(0);
602   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
604   if (size == 1) PetscFunctionReturn(0);
605 
606   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
607   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
608   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
609   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
610   for (i=0; i<first; i++) notme[i] = i;
611   for (i=last; i<M; i++) notme[i-last+first] = i;
612   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
613   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
614   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
615   Aoff = Aoffs[0];
616   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
617   Boff = Boffs[0];
618   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
619   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
620   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
621   ierr = ISDestroy(Me);CHKERRQ(ierr);
622   ierr = ISDestroy(Notme);CHKERRQ(ierr);
623 
624   PetscFunctionReturn(0);
625 }
626 EXTERN_C_END
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
633   PetscErrorCode ierr;
634 
635   PetscFunctionBegin;
636   /* do nondiagonal part */
637   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
638   /* send it on its way */
639   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
640   /* do local part */
641   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
642   /* receive remote parts */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   PetscErrorCode ierr;
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
670 {
671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
676   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705 
706   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
707   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
708   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
709   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
710   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
711   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
712   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_Binary"
718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
722   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
723   PetscErrorCode    ierr;
724   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
725   int               fd;
726   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
727   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
728   PetscScalar       *column_values;
729 
730   PetscFunctionBegin;
731   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
732   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
733   nz   = A->nz + B->nz;
734   if (!rank) {
735     header[0] = MAT_FILE_COOKIE;
736     header[1] = mat->M;
737     header[2] = mat->N;
738     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
739     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
740     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
741     /* get largest number of rows any processor has */
742     rlen = mat->m;
743     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
744     for (i=1; i<size; i++) {
745       rlen = PetscMax(rlen,range[i+1] - range[i]);
746     }
747   } else {
748     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
749     rlen = mat->m;
750   }
751 
752   /* load up the local row counts */
753   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
754   for (i=0; i<mat->m; i++) {
755     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
756   }
757 
758   /* store the row lengths to the file */
759   if (!rank) {
760     MPI_Status status;
761     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
762     for (i=1; i<size; i++) {
763       rlen = range[i+1] - range[i];
764       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
765       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     }
767   } else {
768     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
769   }
770   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
771 
772   /* load up the local column indices */
773   nzmax = nz; /* )th processor needs space a largest processor needs */
774   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
775   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
776   cnt  = 0;
777   for (i=0; i<mat->m; i++) {
778     for (j=B->i[i]; j<B->i[i+1]; j++) {
779       if ( (col = garray[B->j[j]]) > cstart) break;
780       column_indices[cnt++] = col;
781     }
782     for (k=A->i[i]; k<A->i[i+1]; k++) {
783       column_indices[cnt++] = A->j[k] + cstart;
784     }
785     for (; j<B->i[i+1]; j++) {
786       column_indices[cnt++] = garray[B->j[j]];
787     }
788   }
789   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
790 
791   /* store the column indices to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
797       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
798       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
799       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
800     }
801   } else {
802     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
803     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(column_indices);CHKERRQ(ierr);
806 
807   /* load up the local column values */
808   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( garray[B->j[j]] > cstart) break;
813       column_values[cnt++] = B->a[j];
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_values[cnt++] = A->a[k];
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_values[cnt++] = B->a[j];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
823 
824   /* store the column values to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
831       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_values);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
845 {
846   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
847   PetscErrorCode    ierr;
848   PetscMPIInt       rank = aij->rank,size = aij->size;
849   PetscTruth        isdraw,iascii,isbinary;
850   PetscViewer       sviewer;
851   PetscViewerFormat format;
852 
853   PetscFunctionBegin;
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
856   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
857   if (iascii) {
858     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
859     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
860       MatInfo    info;
861       PetscTruth inodes;
862 
863       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
864       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
865       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
866       if (!inodes) {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
868 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
869       } else {
870         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
871 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
872       }
873       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
876       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
877       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
878       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       PetscInt   inodecount,inodelimit,*inodes;
882       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
883       if (inodes) {
884         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
885       } else {
886         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
887       }
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
890       PetscFunctionReturn(0);
891     }
892   } else if (isbinary) {
893     if (size == 1) {
894       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
895       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
896     } else {
897       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
898     }
899     PetscFunctionReturn(0);
900   } else if (isdraw) {
901     PetscDraw  draw;
902     PetscTruth isnull;
903     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
904     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
905   }
906 
907   if (size == 1) {
908     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
909     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
910   } else {
911     /* assemble the entire matrix onto first processor. */
912     Mat         A;
913     Mat_SeqAIJ  *Aloc;
914     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
915     PetscScalar *a;
916 
917     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
918     if (!rank) {
919       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
920     } else {
921       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
922     }
923     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
924     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
925     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
926     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqAIJ*)aij->A->data;
930     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     row = aij->rstart;
932     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
933     for (i=0; i<m; i++) {
934       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
935       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936     }
937     aj = Aloc->j;
938     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
939 
940     /* copy over the B part */
941     Aloc = (Mat_SeqAIJ*)aij->B->data;
942     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row  = aij->rstart;
944     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
945     ct   = cols;
946     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950     }
951     ierr = PetscFree(ct);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
961       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
964     ierr = MatDestroy(A);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatView_MPIAIJ"
971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972 {
973   PetscErrorCode ierr;
974   PetscTruth     iascii,isdraw,issocket,isbinary;
975 
976   PetscFunctionBegin;
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
978   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
980   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
981   if (iascii || isdraw || isbinary || issocket) {
982     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
983   } else {
984     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatRelax_MPIAIJ"
993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
994 {
995   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
996   PetscErrorCode ierr;
997   Vec            bb1;
998 
999   PetscFunctionBegin;
1000   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1001 
1002   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1003 
1004   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1005     if (flag & SOR_ZERO_INITIAL_GUESS) {
1006       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1007       its--;
1008     }
1009 
1010     while (its--) {
1011       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1012       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013 
1014       /* update rhs: bb1 = bb - B*x */
1015       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1016       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1017 
1018       /* local sweep */
1019       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1020       CHKERRQ(ierr);
1021     }
1022   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1023     if (flag & SOR_ZERO_INITIAL_GUESS) {
1024       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1025       its--;
1026     }
1027     while (its--) {
1028       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1029       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030 
1031       /* update rhs: bb1 = bb - B*x */
1032       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1033       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1034 
1035       /* local sweep */
1036       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1037       CHKERRQ(ierr);
1038     }
1039   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1040     if (flag & SOR_ZERO_INITIAL_GUESS) {
1041       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1042       its--;
1043     }
1044     while (its--) {
1045       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1046       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047 
1048       /* update rhs: bb1 = bb - B*x */
1049       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1050       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1051 
1052       /* local sweep */
1053       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1054       CHKERRQ(ierr);
1055     }
1056   } else {
1057     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1058   }
1059 
1060   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatPermute_MPIAIJ"
1066 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1067 {
1068   MPI_Comm       comm,pcomm;
1069   PetscInt       first,local_size,nrows,*rows;
1070   int            ntids;
1071   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1076   /* make a collective version of 'rowp' */
1077   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1078   if (pcomm==comm) {
1079     crowp = rowp;
1080   } else {
1081     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1082     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1083     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1084     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1085   }
1086   /* collect the global row permutation and invert it */
1087   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1088   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1089   if (pcomm!=comm) {
1090     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1091   }
1092   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1093   /* get the local target indices */
1094   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1095   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1096   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1097   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1098   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1099   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1100   /* the column permutation is so much easier;
1101      make a local version of 'colp' and invert it */
1102   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1103   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1104   if (ntids==1) {
1105     lcolp = colp;
1106   } else {
1107     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1108     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1109     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1110   }
1111   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1112   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1113   if (ntids>1) {
1114     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1115     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1116   }
1117   /* now we just get the submatrix */
1118   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1119   /* clean up */
1120   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1121   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1127 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1128 {
1129   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1130   Mat            A = mat->A,B = mat->B;
1131   PetscErrorCode ierr;
1132   PetscReal      isend[5],irecv[5];
1133 
1134   PetscFunctionBegin;
1135   info->block_size     = 1.0;
1136   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1137   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1138   isend[3] = info->memory;  isend[4] = info->mallocs;
1139   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1140   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1141   isend[3] += info->memory;  isend[4] += info->mallocs;
1142   if (flag == MAT_LOCAL) {
1143     info->nz_used      = isend[0];
1144     info->nz_allocated = isend[1];
1145     info->nz_unneeded  = isend[2];
1146     info->memory       = isend[3];
1147     info->mallocs      = isend[4];
1148   } else if (flag == MAT_GLOBAL_MAX) {
1149     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1150     info->nz_used      = irecv[0];
1151     info->nz_allocated = irecv[1];
1152     info->nz_unneeded  = irecv[2];
1153     info->memory       = irecv[3];
1154     info->mallocs      = irecv[4];
1155   } else if (flag == MAT_GLOBAL_SUM) {
1156     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1157     info->nz_used      = irecv[0];
1158     info->nz_allocated = irecv[1];
1159     info->nz_unneeded  = irecv[2];
1160     info->memory       = irecv[3];
1161     info->mallocs      = irecv[4];
1162   }
1163   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1164   info->fill_ratio_needed = 0;
1165   info->factor_mallocs    = 0;
1166   info->rows_global       = (double)matin->M;
1167   info->columns_global    = (double)matin->N;
1168   info->rows_local        = (double)matin->m;
1169   info->columns_local     = (double)matin->N;
1170 
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatSetOption_MPIAIJ"
1176 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1177 {
1178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   switch (op) {
1183   case MAT_NO_NEW_NONZERO_LOCATIONS:
1184   case MAT_YES_NEW_NONZERO_LOCATIONS:
1185   case MAT_COLUMNS_UNSORTED:
1186   case MAT_COLUMNS_SORTED:
1187   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1188   case MAT_KEEP_ZEROED_ROWS:
1189   case MAT_NEW_NONZERO_LOCATION_ERR:
1190   case MAT_USE_INODES:
1191   case MAT_DO_NOT_USE_INODES:
1192   case MAT_IGNORE_ZERO_ENTRIES:
1193     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_ROW_ORIENTED:
1197     a->roworiented = PETSC_TRUE;
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1200     break;
1201   case MAT_ROWS_SORTED:
1202   case MAT_ROWS_UNSORTED:
1203   case MAT_YES_NEW_DIAGONALS:
1204     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1205     break;
1206   case MAT_COLUMN_ORIENTED:
1207     a->roworiented = PETSC_FALSE;
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1210     break;
1211   case MAT_IGNORE_OFF_PROC_ENTRIES:
1212     a->donotstash = PETSC_TRUE;
1213     break;
1214   case MAT_NO_NEW_DIAGONALS:
1215     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1216   case MAT_SYMMETRIC:
1217   case MAT_STRUCTURALLY_SYMMETRIC:
1218   case MAT_HERMITIAN:
1219   case MAT_SYMMETRY_ETERNAL:
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     break;
1222   case MAT_NOT_SYMMETRIC:
1223   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1224   case MAT_NOT_HERMITIAN:
1225   case MAT_NOT_SYMMETRY_ETERNAL:
1226     break;
1227   default:
1228     SETERRQ(PETSC_ERR_SUP,"unknown option");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatGetRow_MPIAIJ"
1235 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1236 {
1237   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1238   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1239   PetscErrorCode ierr;
1240   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1241   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1242   PetscInt       *cmap,*idx_p;
1243 
1244   PetscFunctionBegin;
1245   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1246   mat->getrowactive = PETSC_TRUE;
1247 
1248   if (!mat->rowvalues && (idx || v)) {
1249     /*
1250         allocate enough space to hold information from the longest row.
1251     */
1252     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1253     PetscInt     max = 1,tmp;
1254     for (i=0; i<matin->m; i++) {
1255       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1256       if (max < tmp) { max = tmp; }
1257     }
1258     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1259     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1260   }
1261 
1262   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1263   lrow = row - rstart;
1264 
1265   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1266   if (!v)   {pvA = 0; pvB = 0;}
1267   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1268   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1269   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1270   nztot = nzA + nzB;
1271 
1272   cmap  = mat->garray;
1273   if (v  || idx) {
1274     if (nztot) {
1275       /* Sort by increasing column numbers, assuming A and B already sorted */
1276       PetscInt imark = -1;
1277       if (v) {
1278         *v = v_p = mat->rowvalues;
1279         for (i=0; i<nzB; i++) {
1280           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1281           else break;
1282         }
1283         imark = i;
1284         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1285         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1286       }
1287       if (idx) {
1288         *idx = idx_p = mat->rowindices;
1289         if (imark > -1) {
1290           for (i=0; i<imark; i++) {
1291             idx_p[i] = cmap[cworkB[i]];
1292           }
1293         } else {
1294           for (i=0; i<nzB; i++) {
1295             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1296             else break;
1297           }
1298           imark = i;
1299         }
1300         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1301         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1302       }
1303     } else {
1304       if (idx) *idx = 0;
1305       if (v)   *v   = 0;
1306     }
1307   }
1308   *nz = nztot;
1309   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1310   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1316 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1317 {
1318   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1319 
1320   PetscFunctionBegin;
1321   if (!aij->getrowactive) {
1322     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1323   }
1324   aij->getrowactive = PETSC_FALSE;
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatNorm_MPIAIJ"
1330 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1331 {
1332   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1333   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1334   PetscErrorCode ierr;
1335   PetscInt       i,j,cstart = aij->cstart;
1336   PetscReal      sum = 0.0;
1337   PetscScalar    *v;
1338 
1339   PetscFunctionBegin;
1340   if (aij->size == 1) {
1341     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1342   } else {
1343     if (type == NORM_FROBENIUS) {
1344       v = amat->a;
1345       for (i=0; i<amat->nz; i++) {
1346 #if defined(PETSC_USE_COMPLEX)
1347         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1348 #else
1349         sum += (*v)*(*v); v++;
1350 #endif
1351       }
1352       v = bmat->a;
1353       for (i=0; i<bmat->nz; i++) {
1354 #if defined(PETSC_USE_COMPLEX)
1355         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1356 #else
1357         sum += (*v)*(*v); v++;
1358 #endif
1359       }
1360       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1361       *norm = sqrt(*norm);
1362     } else if (type == NORM_1) { /* max column norm */
1363       PetscReal *tmp,*tmp2;
1364       PetscInt    *jj,*garray = aij->garray;
1365       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1366       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1367       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1368       *norm = 0.0;
1369       v = amat->a; jj = amat->j;
1370       for (j=0; j<amat->nz; j++) {
1371         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1372       }
1373       v = bmat->a; jj = bmat->j;
1374       for (j=0; j<bmat->nz; j++) {
1375         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1376       }
1377       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1378       for (j=0; j<mat->N; j++) {
1379         if (tmp2[j] > *norm) *norm = tmp2[j];
1380       }
1381       ierr = PetscFree(tmp);CHKERRQ(ierr);
1382       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1383     } else if (type == NORM_INFINITY) { /* max row norm */
1384       PetscReal ntemp = 0.0;
1385       for (j=0; j<aij->A->m; j++) {
1386         v = amat->a + amat->i[j];
1387         sum = 0.0;
1388         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1389           sum += PetscAbsScalar(*v); v++;
1390         }
1391         v = bmat->a + bmat->i[j];
1392         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1393           sum += PetscAbsScalar(*v); v++;
1394         }
1395         if (sum > ntemp) ntemp = sum;
1396       }
1397       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1398     } else {
1399       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1400     }
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatTranspose_MPIAIJ"
1407 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1408 {
1409   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1410   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1411   PetscErrorCode ierr;
1412   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1413   Mat            B;
1414   PetscScalar    *array;
1415 
1416   PetscFunctionBegin;
1417   if (!matout && M != N) {
1418     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1419   }
1420 
1421   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1422   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1423   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1424   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1425 
1426   /* copy over the A part */
1427   Aloc = (Mat_SeqAIJ*)a->A->data;
1428   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1429   row = a->rstart;
1430   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1431   for (i=0; i<m; i++) {
1432     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1433     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1434   }
1435   aj = Aloc->j;
1436   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1437 
1438   /* copy over the B part */
1439   Aloc = (Mat_SeqAIJ*)a->B->data;
1440   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1441   row  = a->rstart;
1442   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1443   ct   = cols;
1444   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1445   for (i=0; i<m; i++) {
1446     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1447     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1448   }
1449   ierr = PetscFree(ct);CHKERRQ(ierr);
1450   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1451   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1452   if (matout) {
1453     *matout = B;
1454   } else {
1455     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1462 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1463 {
1464   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1465   Mat            a = aij->A,b = aij->B;
1466   PetscErrorCode ierr;
1467   PetscInt       s1,s2,s3;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1471   if (rr) {
1472     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1473     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1474     /* Overlap communication with computation. */
1475     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1476   }
1477   if (ll) {
1478     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1479     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1480     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1481   }
1482   /* scale  the diagonal block */
1483   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1484 
1485   if (rr) {
1486     /* Do a scatter end and then right scale the off-diagonal block */
1487     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1488     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1489   }
1490 
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1497 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1498 {
1499   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1500   PetscErrorCode ierr;
1501 
1502   PetscFunctionBegin;
1503   if (!a->rank) {
1504     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1512 {
1513   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1518   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1524 {
1525   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatEqual_MPIAIJ"
1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1536 {
1537   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1538   Mat            a,b,c,d;
1539   PetscTruth     flg;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   a = matA->A; b = matA->B;
1544   c = matB->A; d = matB->B;
1545 
1546   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1547   if (flg) {
1548     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1549   }
1550   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCopy_MPIAIJ"
1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1557 {
1558   PetscErrorCode ierr;
1559   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1560   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1561 
1562   PetscFunctionBegin;
1563   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1564   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1565     /* because of the column compression in the off-processor part of the matrix a->B,
1566        the number of columns in a->B and b->B may be different, hence we cannot call
1567        the MatCopy() directly on the two parts. If need be, we can provide a more
1568        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1569        then copying the submatrices */
1570     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1571   } else {
1572     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1573     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1581 {
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include "petscblaslapack.h"
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatAXPY_MPIAIJ"
1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1593 {
1594   PetscErrorCode ierr;
1595   PetscInt       i;
1596   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1597   PetscBLASInt   bnz,one=1;
1598   Mat_SeqAIJ     *x,*y;
1599 
1600   PetscFunctionBegin;
1601   if (str == SAME_NONZERO_PATTERN) {
1602     PetscScalar alpha = a;
1603     x = (Mat_SeqAIJ *)xx->A->data;
1604     y = (Mat_SeqAIJ *)yy->A->data;
1605     bnz = (PetscBLASInt)x->nz;
1606     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1607     x = (Mat_SeqAIJ *)xx->B->data;
1608     y = (Mat_SeqAIJ *)yy->B->data;
1609     bnz = (PetscBLASInt)x->nz;
1610     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1611   } else if (str == SUBSET_NONZERO_PATTERN) {
1612     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1613 
1614     x = (Mat_SeqAIJ *)xx->B->data;
1615     y = (Mat_SeqAIJ *)yy->B->data;
1616     if (y->xtoy && y->XtoY != xx->B) {
1617       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1618       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1619     }
1620     if (!y->xtoy) { /* get xtoy */
1621       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1622       y->XtoY = xx->B;
1623     }
1624     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1625   } else {
1626     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatConjugate_MPIAIJ"
1635 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1636 {
1637 #if defined(PETSC_USE_COMPLEX)
1638   PetscErrorCode ierr;
1639   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1643   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1644 #else
1645   PetscFunctionBegin;
1646 #endif
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /* -------------------------------------------------------------------*/
1651 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1652        MatGetRow_MPIAIJ,
1653        MatRestoreRow_MPIAIJ,
1654        MatMult_MPIAIJ,
1655 /* 4*/ MatMultAdd_MPIAIJ,
1656        MatMultTranspose_MPIAIJ,
1657        MatMultTransposeAdd_MPIAIJ,
1658        0,
1659        0,
1660        0,
1661 /*10*/ 0,
1662        0,
1663        0,
1664        MatRelax_MPIAIJ,
1665        MatTranspose_MPIAIJ,
1666 /*15*/ MatGetInfo_MPIAIJ,
1667        MatEqual_MPIAIJ,
1668        MatGetDiagonal_MPIAIJ,
1669        MatDiagonalScale_MPIAIJ,
1670        MatNorm_MPIAIJ,
1671 /*20*/ MatAssemblyBegin_MPIAIJ,
1672        MatAssemblyEnd_MPIAIJ,
1673        0,
1674        MatSetOption_MPIAIJ,
1675        MatZeroEntries_MPIAIJ,
1676 /*25*/ MatZeroRows_MPIAIJ,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*30*/ MatSetUpPreallocation_MPIAIJ,
1682        0,
1683        0,
1684        0,
1685        0,
1686 /*35*/ MatDuplicate_MPIAIJ,
1687        0,
1688        0,
1689        0,
1690        0,
1691 /*40*/ MatAXPY_MPIAIJ,
1692        MatGetSubMatrices_MPIAIJ,
1693        MatIncreaseOverlap_MPIAIJ,
1694        MatGetValues_MPIAIJ,
1695        MatCopy_MPIAIJ,
1696 /*45*/ MatPrintHelp_MPIAIJ,
1697        MatScale_MPIAIJ,
1698        0,
1699        0,
1700        0,
1701 /*50*/ MatSetBlockSize_MPIAIJ,
1702        0,
1703        0,
1704        0,
1705        0,
1706 /*55*/ MatFDColoringCreate_MPIAIJ,
1707        0,
1708        MatSetUnfactored_MPIAIJ,
1709        MatPermute_MPIAIJ,
1710        0,
1711 /*60*/ MatGetSubMatrix_MPIAIJ,
1712        MatDestroy_MPIAIJ,
1713        MatView_MPIAIJ,
1714        MatGetPetscMaps_Petsc,
1715        0,
1716 /*65*/ 0,
1717        0,
1718        0,
1719        0,
1720        0,
1721 /*70*/ 0,
1722        0,
1723        MatSetColoring_MPIAIJ,
1724 #if defined(PETSC_HAVE_ADIC)
1725        MatSetValuesAdic_MPIAIJ,
1726 #else
1727        0,
1728 #endif
1729        MatSetValuesAdifor_MPIAIJ,
1730 /*75*/ 0,
1731        0,
1732        0,
1733        0,
1734        0,
1735 /*80*/ 0,
1736        0,
1737        0,
1738        0,
1739 /*84*/ MatLoad_MPIAIJ,
1740        0,
1741        0,
1742        0,
1743        0,
1744        0,
1745 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1746        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1747        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1748        MatPtAP_Basic,
1749        MatPtAPSymbolic_MPIAIJ,
1750 /*95*/ MatPtAPNumeric_MPIAIJ,
1751        0,
1752        0,
1753        0,
1754        0,
1755 /*100*/0,
1756        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1757        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1758        MatConjugate_MPIAIJ
1759 };
1760 
1761 /* ----------------------------------------------------------------------------------------*/
1762 
1763 EXTERN_C_BEGIN
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1766 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1767 {
1768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1773   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1774   PetscFunctionReturn(0);
1775 }
1776 EXTERN_C_END
1777 
1778 EXTERN_C_BEGIN
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1781 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1782 {
1783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1784   PetscErrorCode ierr;
1785 
1786   PetscFunctionBegin;
1787   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1788   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 EXTERN_C_END
1792 
1793 #include "petscpc.h"
1794 EXTERN_C_BEGIN
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1798 {
1799   Mat_MPIAIJ     *b;
1800   PetscErrorCode ierr;
1801   PetscInt       i;
1802 
1803   PetscFunctionBegin;
1804   B->preallocated = PETSC_TRUE;
1805   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1806   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1807   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1808   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1809   if (d_nnz) {
1810     for (i=0; i<B->m; i++) {
1811       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1812     }
1813   }
1814   if (o_nnz) {
1815     for (i=0; i<B->m; i++) {
1816       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1817     }
1818   }
1819   b = (Mat_MPIAIJ*)B->data;
1820   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1821   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1822 
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1830 {
1831   Mat            mat;
1832   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1833   PetscErrorCode ierr;
1834 
1835   PetscFunctionBegin;
1836   *newmat       = 0;
1837   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1838   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1839   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1840   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1841   a    = (Mat_MPIAIJ*)mat->data;
1842 
1843   mat->factor       = matin->factor;
1844   mat->bs           = matin->bs;
1845   mat->assembled    = PETSC_TRUE;
1846   mat->insertmode   = NOT_SET_VALUES;
1847   mat->preallocated = PETSC_TRUE;
1848 
1849   a->rstart       = oldmat->rstart;
1850   a->rend         = oldmat->rend;
1851   a->cstart       = oldmat->cstart;
1852   a->cend         = oldmat->cend;
1853   a->size         = oldmat->size;
1854   a->rank         = oldmat->rank;
1855   a->donotstash   = oldmat->donotstash;
1856   a->roworiented  = oldmat->roworiented;
1857   a->rowindices   = 0;
1858   a->rowvalues    = 0;
1859   a->getrowactive = PETSC_FALSE;
1860 
1861   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1862   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1863   if (oldmat->colmap) {
1864 #if defined (PETSC_USE_CTABLE)
1865     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1866 #else
1867     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1868     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1869     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1870 #endif
1871   } else a->colmap = 0;
1872   if (oldmat->garray) {
1873     PetscInt len;
1874     len  = oldmat->B->n;
1875     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1876     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1877     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1878   } else a->garray = 0;
1879 
1880   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1881   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1882   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1883   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1884   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1885   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1886   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1887   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1888   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1889   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1890   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1891   *newmat = mat;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #include "petscsys.h"
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatLoad_MPIAIJ"
1899 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1900 {
1901   Mat            A;
1902   PetscScalar    *vals,*svals;
1903   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1904   MPI_Status     status;
1905   PetscErrorCode ierr;
1906   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1907   PetscInt       i,nz,j,rstart,rend,mmax;
1908   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1909   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1910   PetscInt       cend,cstart,n,*rowners;
1911   int            fd;
1912 
1913   PetscFunctionBegin;
1914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1915   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1916   if (!rank) {
1917     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1918     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1919     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1920   }
1921 
1922   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1923   M = header[1]; N = header[2];
1924   /* determine ownership of all rows */
1925   m    = M/size + ((M % size) > rank);
1926   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1927   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1928 
1929   /* First process needs enough room for process with most rows */
1930   if (!rank) {
1931     mmax       = rowners[1];
1932     for (i=2; i<size; i++) {
1933       mmax = PetscMax(mmax,rowners[i]);
1934     }
1935   } else mmax = m;
1936 
1937   rowners[0] = 0;
1938   for (i=2; i<=size; i++) {
1939     mmax       = PetscMax(mmax,rowners[i]);
1940     rowners[i] += rowners[i-1];
1941   }
1942   rstart = rowners[rank];
1943   rend   = rowners[rank+1];
1944 
1945   /* distribute row lengths to all processors */
1946   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1947   if (!rank) {
1948     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1949     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1950     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1951     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1952     for (j=0; j<m; j++) {
1953       procsnz[0] += ourlens[j];
1954     }
1955     for (i=1; i<size; i++) {
1956       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1957       /* calculate the number of nonzeros on each processor */
1958       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1959         procsnz[i] += rowlengths[j];
1960       }
1961       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1962     }
1963     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1964   } else {
1965     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1966   }
1967 
1968   if (!rank) {
1969     /* determine max buffer needed and allocate it */
1970     maxnz = 0;
1971     for (i=0; i<size; i++) {
1972       maxnz = PetscMax(maxnz,procsnz[i]);
1973     }
1974     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1975 
1976     /* read in my part of the matrix column indices  */
1977     nz   = procsnz[0];
1978     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1979     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1980 
1981     /* read in every one elses and ship off */
1982     for (i=1; i<size; i++) {
1983       nz   = procsnz[i];
1984       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1985       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1986     }
1987     ierr = PetscFree(cols);CHKERRQ(ierr);
1988   } else {
1989     /* determine buffer space needed for message */
1990     nz = 0;
1991     for (i=0; i<m; i++) {
1992       nz += ourlens[i];
1993     }
1994     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1995 
1996     /* receive message of column indices*/
1997     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1998     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1999     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2000   }
2001 
2002   /* determine column ownership if matrix is not square */
2003   if (N != M) {
2004     n      = N/size + ((N % size) > rank);
2005     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2006     cstart = cend - n;
2007   } else {
2008     cstart = rstart;
2009     cend   = rend;
2010     n      = cend - cstart;
2011   }
2012 
2013   /* loop over local rows, determining number of off diagonal entries */
2014   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2015   jj = 0;
2016   for (i=0; i<m; i++) {
2017     for (j=0; j<ourlens[i]; j++) {
2018       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2019       jj++;
2020     }
2021   }
2022 
2023   /* create our matrix */
2024   for (i=0; i<m; i++) {
2025     ourlens[i] -= offlens[i];
2026   }
2027   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2028   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2029   ierr = MatSetType(A,type);CHKERRQ(ierr);
2030   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2031 
2032   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2033   for (i=0; i<m; i++) {
2034     ourlens[i] += offlens[i];
2035   }
2036 
2037   if (!rank) {
2038     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2039 
2040     /* read in my part of the matrix numerical values  */
2041     nz   = procsnz[0];
2042     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2043 
2044     /* insert into matrix */
2045     jj      = rstart;
2046     smycols = mycols;
2047     svals   = vals;
2048     for (i=0; i<m; i++) {
2049       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2050       smycols += ourlens[i];
2051       svals   += ourlens[i];
2052       jj++;
2053     }
2054 
2055     /* read in other processors and ship out */
2056     for (i=1; i<size; i++) {
2057       nz   = procsnz[i];
2058       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2059       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2060     }
2061     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2062   } else {
2063     /* receive numeric values */
2064     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2065 
2066     /* receive message of values*/
2067     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2068     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2069     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2070 
2071     /* insert into matrix */
2072     jj      = rstart;
2073     smycols = mycols;
2074     svals   = vals;
2075     for (i=0; i<m; i++) {
2076       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2077       smycols += ourlens[i];
2078       svals   += ourlens[i];
2079       jj++;
2080     }
2081   }
2082   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2083   ierr = PetscFree(vals);CHKERRQ(ierr);
2084   ierr = PetscFree(mycols);CHKERRQ(ierr);
2085   ierr = PetscFree(rowners);CHKERRQ(ierr);
2086 
2087   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089   *newmat = A;
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2095 /*
2096     Not great since it makes two copies of the submatrix, first an SeqAIJ
2097   in local and then by concatenating the local matrices the end result.
2098   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2099 */
2100 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2101 {
2102   PetscErrorCode ierr;
2103   PetscMPIInt    rank,size;
2104   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2105   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2106   Mat            *local,M,Mreuse;
2107   PetscScalar    *vwork,*aa;
2108   MPI_Comm       comm = mat->comm;
2109   Mat_SeqAIJ     *aij;
2110 
2111 
2112   PetscFunctionBegin;
2113   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2115 
2116   if (call ==  MAT_REUSE_MATRIX) {
2117     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2118     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2119     local = &Mreuse;
2120     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2121   } else {
2122     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2123     Mreuse = *local;
2124     ierr   = PetscFree(local);CHKERRQ(ierr);
2125   }
2126 
2127   /*
2128       m - number of local rows
2129       n - number of columns (same on all processors)
2130       rstart - first row in new global matrix generated
2131   */
2132   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2133   if (call == MAT_INITIAL_MATRIX) {
2134     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2135     ii  = aij->i;
2136     jj  = aij->j;
2137 
2138     /*
2139         Determine the number of non-zeros in the diagonal and off-diagonal
2140         portions of the matrix in order to do correct preallocation
2141     */
2142 
2143     /* first get start and end of "diagonal" columns */
2144     if (csize == PETSC_DECIDE) {
2145       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2146       if (mglobal == n) { /* square matrix */
2147 	nlocal = m;
2148       } else {
2149         nlocal = n/size + ((n % size) > rank);
2150       }
2151     } else {
2152       nlocal = csize;
2153     }
2154     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2155     rstart = rend - nlocal;
2156     if (rank == size - 1 && rend != n) {
2157       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2158     }
2159 
2160     /* next, compute all the lengths */
2161     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2162     olens = dlens + m;
2163     for (i=0; i<m; i++) {
2164       jend = ii[i+1] - ii[i];
2165       olen = 0;
2166       dlen = 0;
2167       for (j=0; j<jend; j++) {
2168         if (*jj < rstart || *jj >= rend) olen++;
2169         else dlen++;
2170         jj++;
2171       }
2172       olens[i] = olen;
2173       dlens[i] = dlen;
2174     }
2175     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2176     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2177     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2178     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2179     ierr = PetscFree(dlens);CHKERRQ(ierr);
2180   } else {
2181     PetscInt ml,nl;
2182 
2183     M = *newmat;
2184     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2185     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2186     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2187     /*
2188          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2189        rather than the slower MatSetValues().
2190     */
2191     M->was_assembled = PETSC_TRUE;
2192     M->assembled     = PETSC_FALSE;
2193   }
2194   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2195   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2196   ii  = aij->i;
2197   jj  = aij->j;
2198   aa  = aij->a;
2199   for (i=0; i<m; i++) {
2200     row   = rstart + i;
2201     nz    = ii[i+1] - ii[i];
2202     cwork = jj;     jj += nz;
2203     vwork = aa;     aa += nz;
2204     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2205   }
2206 
2207   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2208   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2209   *newmat = M;
2210 
2211   /* save submatrix used in processor for next request */
2212   if (call ==  MAT_INITIAL_MATRIX) {
2213     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2214     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2215   }
2216 
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 EXTERN_C_BEGIN
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2223 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2224 {
2225   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2226   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2227   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2228   const PetscInt *JJ;
2229   PetscScalar    *values;
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2234   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2235   o_nnz = d_nnz + m;
2236 
2237   for (i=0; i<m; i++) {
2238     nnz     = I[i+1]- I[i];
2239     JJ      = J + I[i];
2240     nnz_max = PetscMax(nnz_max,nnz);
2241     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2242     for (j=0; j<nnz; j++) {
2243       if (*JJ >= cstart) break;
2244       JJ++;
2245     }
2246     d = 0;
2247     for (; j<nnz; j++) {
2248       if (*JJ++ >= cend) break;
2249       d++;
2250     }
2251     d_nnz[i] = d;
2252     o_nnz[i] = nnz - d;
2253   }
2254   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2255   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2256 
2257   if (v) values = (PetscScalar*)v;
2258   else {
2259     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2260     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2261   }
2262 
2263   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2264   for (i=0; i<m; i++) {
2265     ii   = i + rstart;
2266     nnz  = I[i+1]- I[i];
2267     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2268   }
2269   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2272 
2273   if (!v) {
2274     ierr = PetscFree(values);CHKERRQ(ierr);
2275   }
2276   PetscFunctionReturn(0);
2277 }
2278 EXTERN_C_END
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2282 /*@C
2283    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2284    (the default parallel PETSc format).
2285 
2286    Collective on MPI_Comm
2287 
2288    Input Parameters:
2289 +  B - the matrix
2290 .  i - the indices into j for the start of each local row (starts with zero)
2291 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2292 -  v - optional values in the matrix
2293 
2294    Level: developer
2295 
2296 .keywords: matrix, aij, compressed row, sparse, parallel
2297 
2298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2299 @*/
2300 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2301 {
2302   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2303 
2304   PetscFunctionBegin;
2305   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2306   if (f) {
2307     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2308   }
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2314 /*@C
2315    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2316    (the default parallel PETSc format).  For good matrix assembly performance
2317    the user should preallocate the matrix storage by setting the parameters
2318    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2319    performance can be increased by more than a factor of 50.
2320 
2321    Collective on MPI_Comm
2322 
2323    Input Parameters:
2324 +  A - the matrix
2325 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2326            (same value is used for all local rows)
2327 .  d_nnz - array containing the number of nonzeros in the various rows of the
2328            DIAGONAL portion of the local submatrix (possibly different for each row)
2329            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2330            The size of this array is equal to the number of local rows, i.e 'm'.
2331            You must leave room for the diagonal entry even if it is zero.
2332 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2333            submatrix (same value is used for all local rows).
2334 -  o_nnz - array containing the number of nonzeros in the various rows of the
2335            OFF-DIAGONAL portion of the local submatrix (possibly different for
2336            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2337            structure. The size of this array is equal to the number
2338            of local rows, i.e 'm'.
2339 
2340    If the *_nnz parameter is given then the *_nz parameter is ignored
2341 
2342    The AIJ format (also called the Yale sparse matrix format or
2343    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2344    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2345 
2346    The parallel matrix is partitioned such that the first m0 rows belong to
2347    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2348    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2349 
2350    The DIAGONAL portion of the local submatrix of a processor can be defined
2351    as the submatrix which is obtained by extraction the part corresponding
2352    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2353    first row that belongs to the processor, and r2 is the last row belonging
2354    to the this processor. This is a square mxm matrix. The remaining portion
2355    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2356 
2357    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2358 
2359    Example usage:
2360 
2361    Consider the following 8x8 matrix with 34 non-zero values, that is
2362    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2363    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2364    as follows:
2365 
2366 .vb
2367             1  2  0  |  0  3  0  |  0  4
2368     Proc0   0  5  6  |  7  0  0  |  8  0
2369             9  0 10  | 11  0  0  | 12  0
2370     -------------------------------------
2371            13  0 14  | 15 16 17  |  0  0
2372     Proc1   0 18  0  | 19 20 21  |  0  0
2373             0  0  0  | 22 23  0  | 24  0
2374     -------------------------------------
2375     Proc2  25 26 27  |  0  0 28  | 29  0
2376            30  0  0  | 31 32 33  |  0 34
2377 .ve
2378 
2379    This can be represented as a collection of submatrices as:
2380 
2381 .vb
2382       A B C
2383       D E F
2384       G H I
2385 .ve
2386 
2387    Where the submatrices A,B,C are owned by proc0, D,E,F are
2388    owned by proc1, G,H,I are owned by proc2.
2389 
2390    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2391    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2392    The 'M','N' parameters are 8,8, and have the same values on all procs.
2393 
2394    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2395    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2396    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2397    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2398    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2399    matrix, ans [DF] as another SeqAIJ matrix.
2400 
2401    When d_nz, o_nz parameters are specified, d_nz storage elements are
2402    allocated for every row of the local diagonal submatrix, and o_nz
2403    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2404    One way to choose d_nz and o_nz is to use the max nonzerors per local
2405    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2406    In this case, the values of d_nz,o_nz are:
2407 .vb
2408      proc0 : dnz = 2, o_nz = 2
2409      proc1 : dnz = 3, o_nz = 2
2410      proc2 : dnz = 1, o_nz = 4
2411 .ve
2412    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2413    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2414    for proc3. i.e we are using 12+15+10=37 storage locations to store
2415    34 values.
2416 
2417    When d_nnz, o_nnz parameters are specified, the storage is specified
2418    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2419    In the above case the values for d_nnz,o_nnz are:
2420 .vb
2421      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2422      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2423      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2424 .ve
2425    Here the space allocated is sum of all the above values i.e 34, and
2426    hence pre-allocation is perfect.
2427 
2428    Level: intermediate
2429 
2430 .keywords: matrix, aij, compressed row, sparse, parallel
2431 
2432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2433           MPIAIJ
2434 @*/
2435 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2436 {
2437   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2441   if (f) {
2442     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2443   }
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatCreateMPIAIJ"
2449 /*@C
2450    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2451    (the default parallel PETSc format).  For good matrix assembly performance
2452    the user should preallocate the matrix storage by setting the parameters
2453    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2454    performance can be increased by more than a factor of 50.
2455 
2456    Collective on MPI_Comm
2457 
2458    Input Parameters:
2459 +  comm - MPI communicator
2460 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2461            This value should be the same as the local size used in creating the
2462            y vector for the matrix-vector product y = Ax.
2463 .  n - This value should be the same as the local size used in creating the
2464        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2465        calculated if N is given) For square matrices n is almost always m.
2466 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2467 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2468 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2469            (same value is used for all local rows)
2470 .  d_nnz - array containing the number of nonzeros in the various rows of the
2471            DIAGONAL portion of the local submatrix (possibly different for each row)
2472            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2473            The size of this array is equal to the number of local rows, i.e 'm'.
2474            You must leave room for the diagonal entry even if it is zero.
2475 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2476            submatrix (same value is used for all local rows).
2477 -  o_nnz - array containing the number of nonzeros in the various rows of the
2478            OFF-DIAGONAL portion of the local submatrix (possibly different for
2479            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2480            structure. The size of this array is equal to the number
2481            of local rows, i.e 'm'.
2482 
2483    Output Parameter:
2484 .  A - the matrix
2485 
2486    Notes:
2487    If the *_nnz parameter is given then the *_nz parameter is ignored
2488 
2489    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2490    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2491    storage requirements for this matrix.
2492 
2493    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2494    processor than it must be used on all processors that share the object for
2495    that argument.
2496 
2497    The user MUST specify either the local or global matrix dimensions
2498    (possibly both).
2499 
2500    The parallel matrix is partitioned such that the first m0 rows belong to
2501    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2502    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2503 
2504    The DIAGONAL portion of the local submatrix of a processor can be defined
2505    as the submatrix which is obtained by extraction the part corresponding
2506    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2507    first row that belongs to the processor, and r2 is the last row belonging
2508    to the this processor. This is a square mxm matrix. The remaining portion
2509    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2510 
2511    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2512 
2513    When calling this routine with a single process communicator, a matrix of
2514    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2515    type of communicator, use the construction mechanism:
2516      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2517 
2518    By default, this format uses inodes (identical nodes) when possible.
2519    We search for consecutive rows with the same nonzero structure, thereby
2520    reusing matrix information to achieve increased efficiency.
2521 
2522    Options Database Keys:
2523 +  -mat_no_inode  - Do not use inodes
2524 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2525 -  -mat_aij_oneindex - Internally use indexing starting at 1
2526         rather than 0.  Note that when calling MatSetValues(),
2527         the user still MUST index entries starting at 0!
2528 
2529 
2530    Example usage:
2531 
2532    Consider the following 8x8 matrix with 34 non-zero values, that is
2533    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2534    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2535    as follows:
2536 
2537 .vb
2538             1  2  0  |  0  3  0  |  0  4
2539     Proc0   0  5  6  |  7  0  0  |  8  0
2540             9  0 10  | 11  0  0  | 12  0
2541     -------------------------------------
2542            13  0 14  | 15 16 17  |  0  0
2543     Proc1   0 18  0  | 19 20 21  |  0  0
2544             0  0  0  | 22 23  0  | 24  0
2545     -------------------------------------
2546     Proc2  25 26 27  |  0  0 28  | 29  0
2547            30  0  0  | 31 32 33  |  0 34
2548 .ve
2549 
2550    This can be represented as a collection of submatrices as:
2551 
2552 .vb
2553       A B C
2554       D E F
2555       G H I
2556 .ve
2557 
2558    Where the submatrices A,B,C are owned by proc0, D,E,F are
2559    owned by proc1, G,H,I are owned by proc2.
2560 
2561    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2562    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2563    The 'M','N' parameters are 8,8, and have the same values on all procs.
2564 
2565    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2566    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2567    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2568    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2569    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2570    matrix, ans [DF] as another SeqAIJ matrix.
2571 
2572    When d_nz, o_nz parameters are specified, d_nz storage elements are
2573    allocated for every row of the local diagonal submatrix, and o_nz
2574    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2575    One way to choose d_nz and o_nz is to use the max nonzerors per local
2576    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2577    In this case, the values of d_nz,o_nz are:
2578 .vb
2579      proc0 : dnz = 2, o_nz = 2
2580      proc1 : dnz = 3, o_nz = 2
2581      proc2 : dnz = 1, o_nz = 4
2582 .ve
2583    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2584    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2585    for proc3. i.e we are using 12+15+10=37 storage locations to store
2586    34 values.
2587 
2588    When d_nnz, o_nnz parameters are specified, the storage is specified
2589    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2590    In the above case the values for d_nnz,o_nnz are:
2591 .vb
2592      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2593      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2594      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2595 .ve
2596    Here the space allocated is sum of all the above values i.e 34, and
2597    hence pre-allocation is perfect.
2598 
2599    Level: intermediate
2600 
2601 .keywords: matrix, aij, compressed row, sparse, parallel
2602 
2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2604           MPIAIJ
2605 @*/
2606 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2607 {
2608   PetscErrorCode ierr;
2609   PetscMPIInt    size;
2610 
2611   PetscFunctionBegin;
2612   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2613   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2615   if (size > 1) {
2616     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2617     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2618   } else {
2619     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2620     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2621   }
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2627 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2628 {
2629   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2630 
2631   PetscFunctionBegin;
2632   *Ad     = a->A;
2633   *Ao     = a->B;
2634   *colmap = a->garray;
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2640 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2641 {
2642   PetscErrorCode ierr;
2643   PetscInt       i;
2644   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2645 
2646   PetscFunctionBegin;
2647   if (coloring->ctype == IS_COLORING_LOCAL) {
2648     ISColoringValue *allcolors,*colors;
2649     ISColoring      ocoloring;
2650 
2651     /* set coloring for diagonal portion */
2652     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2653 
2654     /* set coloring for off-diagonal portion */
2655     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2656     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2657     for (i=0; i<a->B->n; i++) {
2658       colors[i] = allcolors[a->garray[i]];
2659     }
2660     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2661     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2662     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2663     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2664   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2665     ISColoringValue *colors;
2666     PetscInt             *larray;
2667     ISColoring      ocoloring;
2668 
2669     /* set coloring for diagonal portion */
2670     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2671     for (i=0; i<a->A->n; i++) {
2672       larray[i] = i + a->cstart;
2673     }
2674     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2675     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2676     for (i=0; i<a->A->n; i++) {
2677       colors[i] = coloring->colors[larray[i]];
2678     }
2679     ierr = PetscFree(larray);CHKERRQ(ierr);
2680     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2681     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2682     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2683 
2684     /* set coloring for off-diagonal portion */
2685     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2686     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2687     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2688     for (i=0; i<a->B->n; i++) {
2689       colors[i] = coloring->colors[larray[i]];
2690     }
2691     ierr = PetscFree(larray);CHKERRQ(ierr);
2692     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2693     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2694     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2695   } else {
2696     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2697   }
2698 
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 #if defined(PETSC_HAVE_ADIC)
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2705 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2706 {
2707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2712   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 #endif
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2719 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2720 {
2721   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2726   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatMerge"
2732 /*@C
2733       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2734                  matrices from each processor
2735 
2736     Collective on MPI_Comm
2737 
2738    Input Parameters:
2739 +    comm - the communicators the parallel matrix will live on
2740 .    inmat - the input sequential matrices
2741 .    n - number of local columns (or PETSC_DECIDE)
2742 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2743 
2744    Output Parameter:
2745 .    outmat - the parallel matrix generated
2746 
2747     Level: advanced
2748 
2749    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2750 
2751 @*/
2752 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2753 {
2754   PetscErrorCode ierr;
2755   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2756   PetscInt       *indx;
2757   PetscScalar    *values;
2758   PetscMap       columnmap,rowmap;
2759 
2760   PetscFunctionBegin;
2761     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2762   /*
2763   PetscMPIInt       rank;
2764   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2765   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2766   */
2767   if (scall == MAT_INITIAL_MATRIX){
2768     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2769     if (n == PETSC_DECIDE){
2770       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2771       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2772       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2773       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2774       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2775     }
2776 
2777     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2778     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2779     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2780     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2781     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2782 
2783     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2784     for (i=0;i<m;i++) {
2785       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2786       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2787       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2788     }
2789     /* This routine will ONLY return MPIAIJ type matrix */
2790     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2791     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2792     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2793     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2794     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2795 
2796   } else if (scall == MAT_REUSE_MATRIX){
2797     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2798   } else {
2799     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2800   }
2801 
2802   for (i=0;i<m;i++) {
2803     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2804     I    = i + rstart;
2805     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2806     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2807   }
2808   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2809   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811 
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatFileSplit"
2817 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2818 {
2819   PetscErrorCode    ierr;
2820   PetscMPIInt       rank;
2821   PetscInt          m,N,i,rstart,nnz;
2822   size_t            len;
2823   const PetscInt    *indx;
2824   PetscViewer       out;
2825   char              *name;
2826   Mat               B;
2827   const PetscScalar *values;
2828 
2829   PetscFunctionBegin;
2830   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2831   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2832   /* Should this be the type of the diagonal block of A? */
2833   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2834   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2835   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2836   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2837   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2838   for (i=0;i<m;i++) {
2839     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2840     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2841     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2842   }
2843   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845 
2846   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2847   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2848   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2849   sprintf(name,"%s.%d",outfile,rank);
2850   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2851   ierr = PetscFree(name);
2852   ierr = MatView(B,out);CHKERRQ(ierr);
2853   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2854   ierr = MatDestroy(B);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2861 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2862 {
2863   PetscErrorCode       ierr;
2864   Mat_Merge_SeqsToMPI  *merge;
2865   PetscObjectContainer container;
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2869   if (container) {
2870     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2871     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2872     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2873     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2874     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2875     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2876     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2877     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2878     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2879     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2880     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2881     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2882 
2883     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2884     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2885   }
2886   ierr = PetscFree(merge);CHKERRQ(ierr);
2887 
2888   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #include "src/mat/utils/freespace.h"
2893 #include "petscbt.h"
2894 static PetscEvent logkey_seqstompinum = 0;
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2897 /*@C
2898       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2899                  matrices from each processor
2900 
2901     Collective on MPI_Comm
2902 
2903    Input Parameters:
2904 +    comm - the communicators the parallel matrix will live on
2905 .    seqmat - the input sequential matrices
2906 .    m - number of local rows (or PETSC_DECIDE)
2907 .    n - number of local columns (or PETSC_DECIDE)
2908 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2909 
2910    Output Parameter:
2911 .    mpimat - the parallel matrix generated
2912 
2913     Level: advanced
2914 
2915    Notes:
2916      The dimensions of the sequential matrix in each processor MUST be the same.
2917      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2918      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2919 @*/
2920 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2921 {
2922   PetscErrorCode       ierr;
2923   MPI_Comm             comm=mpimat->comm;
2924   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2925   PetscMPIInt          size,rank,taga,*len_s;
2926   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2927   PetscInt             proc,m;
2928   PetscInt             **buf_ri,**buf_rj;
2929   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2930   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2931   MPI_Request          *s_waits,*r_waits;
2932   MPI_Status           *status;
2933   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2934   Mat_Merge_SeqsToMPI  *merge;
2935   PetscObjectContainer container;
2936 
2937   PetscFunctionBegin;
2938   if (!logkey_seqstompinum) {
2939     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2940   }
2941   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2942 
2943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2945 
2946   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2947   if (container) {
2948     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2949   }
2950   bi     = merge->bi;
2951   bj     = merge->bj;
2952   buf_ri = merge->buf_ri;
2953   buf_rj = merge->buf_rj;
2954 
2955   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2956   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2957   len_s  = merge->len_s;
2958 
2959   /* send and recv matrix values */
2960   /*-----------------------------*/
2961   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2962   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2963 
2964   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2965   for (proc=0,k=0; proc<size; proc++){
2966     if (!len_s[proc]) continue;
2967     i = owners[proc];
2968     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2969     k++;
2970   }
2971 
2972   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2973   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2974   ierr = PetscFree(status);CHKERRQ(ierr);
2975 
2976   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2977   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2978 
2979   /* insert mat values of mpimat */
2980   /*----------------------------*/
2981   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2982   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2983   nextrow = buf_ri_k + merge->nrecv;
2984   nextai  = nextrow + merge->nrecv;
2985 
2986   for (k=0; k<merge->nrecv; k++){
2987     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2988     nrows = *(buf_ri_k[k]);
2989     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2990     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2991   }
2992 
2993   /* set values of ba */
2994   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2995   for (i=0; i<m; i++) {
2996     arow = owners[rank] + i;
2997     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2998     bnzi = bi[i+1] - bi[i];
2999     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3000 
3001     /* add local non-zero vals of this proc's seqmat into ba */
3002     anzi = ai[arow+1] - ai[arow];
3003     aj   = a->j + ai[arow];
3004     aa   = a->a + ai[arow];
3005     nextaj = 0;
3006     for (j=0; nextaj<anzi; j++){
3007       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3008         ba_i[j] += aa[nextaj++];
3009       }
3010     }
3011 
3012     /* add received vals into ba */
3013     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3014       /* i-th row */
3015       if (i == *nextrow[k]) {
3016         anzi = *(nextai[k]+1) - *nextai[k];
3017         aj   = buf_rj[k] + *(nextai[k]);
3018         aa   = abuf_r[k] + *(nextai[k]);
3019         nextaj = 0;
3020         for (j=0; nextaj<anzi; j++){
3021           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3022             ba_i[j] += aa[nextaj++];
3023           }
3024         }
3025         nextrow[k]++; nextai[k]++;
3026       }
3027     }
3028     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3029   }
3030   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3031   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032 
3033   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3034   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3035   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3036   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 static PetscEvent logkey_seqstompisym = 0;
3041 #undef __FUNCT__
3042 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3043 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3044 {
3045   PetscErrorCode       ierr;
3046   Mat                  B_mpi;
3047   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3048   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3049   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3050   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3051   PetscInt             len,proc,*dnz,*onz;
3052   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3053   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3054   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3055   MPI_Status           *status;
3056   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3057   PetscBT              lnkbt;
3058   Mat_Merge_SeqsToMPI  *merge;
3059   PetscObjectContainer container;
3060 
3061   PetscFunctionBegin;
3062   if (!logkey_seqstompisym) {
3063     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3064   }
3065   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3066 
3067   /* make sure it is a PETSc comm */
3068   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3070   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3071 
3072   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3073   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3074 
3075   /* determine row ownership */
3076   /*---------------------------------------------------------*/
3077   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3078   if (m == PETSC_DECIDE) {
3079     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3080   } else {
3081     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3082   }
3083   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3084   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3085   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3086 
3087   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3088   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3089 
3090   /* determine the number of messages to send, their lengths */
3091   /*---------------------------------------------------------*/
3092   len_s  = merge->len_s;
3093 
3094   len = 0;  /* length of buf_si[] */
3095   merge->nsend = 0;
3096   for (proc=0; proc<size; proc++){
3097     len_si[proc] = 0;
3098     if (proc == rank){
3099       len_s[proc] = 0;
3100     } else {
3101       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3102       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3103     }
3104     if (len_s[proc]) {
3105       merge->nsend++;
3106       nrows = 0;
3107       for (i=owners[proc]; i<owners[proc+1]; i++){
3108         if (ai[i+1] > ai[i]) nrows++;
3109       }
3110       len_si[proc] = 2*(nrows+1);
3111       len += len_si[proc];
3112     }
3113   }
3114 
3115   /* determine the number and length of messages to receive for ij-structure */
3116   /*-------------------------------------------------------------------------*/
3117   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3118   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3119 
3120   /* post the Irecv of j-structure */
3121   /*-------------------------------*/
3122   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3123   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3124 
3125   /* post the Isend of j-structure */
3126   /*--------------------------------*/
3127   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3128   sj_waits = si_waits + merge->nsend;
3129 
3130   for (proc=0, k=0; proc<size; proc++){
3131     if (!len_s[proc]) continue;
3132     i = owners[proc];
3133     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3134     k++;
3135   }
3136 
3137   /* receives and sends of j-structure are complete */
3138   /*------------------------------------------------*/
3139   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3140   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3141 
3142   /* send and recv i-structure */
3143   /*---------------------------*/
3144   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3145   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3146 
3147   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3148   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3149   for (proc=0,k=0; proc<size; proc++){
3150     if (!len_s[proc]) continue;
3151     /* form outgoing message for i-structure:
3152          buf_si[0]:                 nrows to be sent
3153                [1:nrows]:           row index (global)
3154                [nrows+1:2*nrows+1]: i-structure index
3155     */
3156     /*-------------------------------------------*/
3157     nrows = len_si[proc]/2 - 1;
3158     buf_si_i    = buf_si + nrows+1;
3159     buf_si[0]   = nrows;
3160     buf_si_i[0] = 0;
3161     nrows = 0;
3162     for (i=owners[proc]; i<owners[proc+1]; i++){
3163       anzi = ai[i+1] - ai[i];
3164       if (anzi) {
3165         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3166         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3167         nrows++;
3168       }
3169     }
3170     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3171     k++;
3172     buf_si += len_si[proc];
3173   }
3174 
3175   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3176   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3177 
3178   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3179   for (i=0; i<merge->nrecv; i++){
3180     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3181   }
3182 
3183   ierr = PetscFree(len_si);CHKERRQ(ierr);
3184   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3185   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3186   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3187   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3188   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3189   ierr = PetscFree(status);CHKERRQ(ierr);
3190 
3191   /* compute a local seq matrix in each processor */
3192   /*----------------------------------------------*/
3193   /* allocate bi array and free space for accumulating nonzero column info */
3194   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3195   bi[0] = 0;
3196 
3197   /* create and initialize a linked list */
3198   nlnk = N+1;
3199   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3200 
3201   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3202   len = 0;
3203   len  = ai[owners[rank+1]] - ai[owners[rank]];
3204   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3205   current_space = free_space;
3206 
3207   /* determine symbolic info for each local row */
3208   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3209   nextrow = buf_ri_k + merge->nrecv;
3210   nextai  = nextrow + merge->nrecv;
3211   for (k=0; k<merge->nrecv; k++){
3212     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3213     nrows = *buf_ri_k[k];
3214     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3215     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3216   }
3217 
3218   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3219   len = 0;
3220   for (i=0;i<m;i++) {
3221     bnzi   = 0;
3222     /* add local non-zero cols of this proc's seqmat into lnk */
3223     arow   = owners[rank] + i;
3224     anzi   = ai[arow+1] - ai[arow];
3225     aj     = a->j + ai[arow];
3226     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3227     bnzi += nlnk;
3228     /* add received col data into lnk */
3229     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3230       if (i == *nextrow[k]) { /* i-th row */
3231         anzi = *(nextai[k]+1) - *nextai[k];
3232         aj   = buf_rj[k] + *nextai[k];
3233         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3234         bnzi += nlnk;
3235         nextrow[k]++; nextai[k]++;
3236       }
3237     }
3238     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3239 
3240     /* if free space is not available, make more free space */
3241     if (current_space->local_remaining<bnzi) {
3242       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3243       nspacedouble++;
3244     }
3245     /* copy data into free space, then initialize lnk */
3246     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3247     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3248 
3249     current_space->array           += bnzi;
3250     current_space->local_used      += bnzi;
3251     current_space->local_remaining -= bnzi;
3252 
3253     bi[i+1] = bi[i] + bnzi;
3254   }
3255 
3256   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3257 
3258   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3259   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3260   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3261 
3262   /* create symbolic parallel matrix B_mpi */
3263   /*---------------------------------------*/
3264   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3265   if (n==PETSC_DECIDE) {
3266     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3267   } else {
3268     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3269   }
3270   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3271   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3272   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3273 
3274   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3275   B_mpi->assembled     = PETSC_FALSE;
3276   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3277   merge->bi            = bi;
3278   merge->bj            = bj;
3279   merge->buf_ri        = buf_ri;
3280   merge->buf_rj        = buf_rj;
3281   merge->coi           = PETSC_NULL;
3282   merge->coj           = PETSC_NULL;
3283   merge->owners_co     = PETSC_NULL;
3284 
3285   /* attach the supporting struct to B_mpi for reuse */
3286   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3287   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3288   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3289   *mpimat = B_mpi;
3290 
3291   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3292   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 static PetscEvent logkey_seqstompi = 0;
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatMerge_SeqsToMPI"
3299 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3300 {
3301   PetscErrorCode   ierr;
3302 
3303   PetscFunctionBegin;
3304   if (!logkey_seqstompi) {
3305     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3306   }
3307   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3308   if (scall == MAT_INITIAL_MATRIX){
3309     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3310   }
3311   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3312   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3313   PetscFunctionReturn(0);
3314 }
3315 static PetscEvent logkey_getlocalmat = 0;
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatGetLocalMat"
3318 /*@C
3319      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3320 
3321     Not Collective
3322 
3323    Input Parameters:
3324 +    A - the matrix
3325 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3326 
3327    Output Parameter:
3328 .    A_loc - the local sequential matrix generated
3329 
3330     Level: developer
3331 
3332 @*/
3333 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3334 {
3335   PetscErrorCode  ierr;
3336   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3337   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3338   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3339   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3340   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3341   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3342 
3343   PetscFunctionBegin;
3344   if (!logkey_getlocalmat) {
3345     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3346   }
3347   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3348   if (scall == MAT_INITIAL_MATRIX){
3349     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3350     ci[0] = 0;
3351     for (i=0; i<am; i++){
3352       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3353     }
3354     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3355     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3356     k = 0;
3357     for (i=0; i<am; i++) {
3358       ncols_o = bi[i+1] - bi[i];
3359       ncols_d = ai[i+1] - ai[i];
3360       /* off-diagonal portion of A */
3361       for (jo=0; jo<ncols_o; jo++) {
3362         col = cmap[*bj];
3363         if (col >= cstart) break;
3364         cj[k]   = col; bj++;
3365         ca[k++] = *ba++;
3366       }
3367       /* diagonal portion of A */
3368       for (j=0; j<ncols_d; j++) {
3369         cj[k]   = cstart + *aj++;
3370         ca[k++] = *aa++;
3371       }
3372       /* off-diagonal portion of A */
3373       for (j=jo; j<ncols_o; j++) {
3374         cj[k]   = cmap[*bj++];
3375         ca[k++] = *ba++;
3376       }
3377     }
3378     /* put together the new matrix */
3379     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3380     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3381     /* Since these are PETSc arrays, change flags to free them as necessary. */
3382     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3383     mat->freedata = PETSC_TRUE;
3384     mat->nonew    = 0;
3385   } else if (scall == MAT_REUSE_MATRIX){
3386     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3387     ci = mat->i; cj = mat->j; ca = mat->a;
3388     for (i=0; i<am; i++) {
3389       /* off-diagonal portion of A */
3390       ncols_o = bi[i+1] - bi[i];
3391       for (jo=0; jo<ncols_o; jo++) {
3392         col = cmap[*bj];
3393         if (col >= cstart) break;
3394         *ca++ = *ba++; bj++;
3395       }
3396       /* diagonal portion of A */
3397       ncols_d = ai[i+1] - ai[i];
3398       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3399       /* off-diagonal portion of A */
3400       for (j=jo; j<ncols_o; j++) {
3401         *ca++ = *ba++; bj++;
3402       }
3403     }
3404   } else {
3405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3406   }
3407 
3408   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 static PetscEvent logkey_getlocalmatcondensed = 0;
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatGetLocalMatCondensed"
3415 /*@C
3416      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3417 
3418     Not Collective
3419 
3420    Input Parameters:
3421 +    A - the matrix
3422 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3423 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3424 
3425    Output Parameter:
3426 .    A_loc - the local sequential matrix generated
3427 
3428     Level: developer
3429 
3430 @*/
3431 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3432 {
3433   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3434   PetscErrorCode    ierr;
3435   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3436   IS                isrowa,iscola;
3437   Mat               *aloc;
3438 
3439   PetscFunctionBegin;
3440   if (!logkey_getlocalmatcondensed) {
3441     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3442   }
3443   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3444   if (!row){
3445     start = a->rstart; end = a->rend;
3446     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3447   } else {
3448     isrowa = *row;
3449   }
3450   if (!col){
3451     start = a->cstart;
3452     cmap  = a->garray;
3453     nzA   = a->A->n;
3454     nzB   = a->B->n;
3455     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3456     ncols = 0;
3457     for (i=0; i<nzB; i++) {
3458       if (cmap[i] < start) idx[ncols++] = cmap[i];
3459       else break;
3460     }
3461     imark = i;
3462     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3463     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3464     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3465     ierr = PetscFree(idx);CHKERRQ(ierr);
3466   } else {
3467     iscola = *col;
3468   }
3469   if (scall != MAT_INITIAL_MATRIX){
3470     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3471     aloc[0] = *A_loc;
3472   }
3473   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3474   *A_loc = aloc[0];
3475   ierr = PetscFree(aloc);CHKERRQ(ierr);
3476   if (!row){
3477     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3478   }
3479   if (!col){
3480     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3481   }
3482   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 static PetscEvent logkey_GetBrowsOfAcols = 0;
3487 #undef __FUNCT__
3488 #define __FUNCT__ "MatGetBrowsOfAcols"
3489 /*@C
3490     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3491 
3492     Collective on Mat
3493 
3494    Input Parameters:
3495 +    A,B - the matrices in mpiaij format
3496 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3497 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3498 
3499    Output Parameter:
3500 +    rowb, colb - index sets of rows and columns of B to extract
3501 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3502 -    B_seq - the sequential matrix generated
3503 
3504     Level: developer
3505 
3506 @*/
3507 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3508 {
3509   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3510   PetscErrorCode    ierr;
3511   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3512   IS                isrowb,iscolb;
3513   Mat               *bseq;
3514 
3515   PetscFunctionBegin;
3516   if (a->cstart != b->rstart || a->cend != b->rend){
3517     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3518   }
3519   if (!logkey_GetBrowsOfAcols) {
3520     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3521   }
3522   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3523 
3524   if (scall == MAT_INITIAL_MATRIX){
3525     start = a->cstart;
3526     cmap  = a->garray;
3527     nzA   = a->A->n;
3528     nzB   = a->B->n;
3529     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3530     ncols = 0;
3531     for (i=0; i<nzB; i++) {  /* row < local row index */
3532       if (cmap[i] < start) idx[ncols++] = cmap[i];
3533       else break;
3534     }
3535     imark = i;
3536     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3537     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3538     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3539     ierr = PetscFree(idx);CHKERRQ(ierr);
3540     *brstart = imark;
3541     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3542   } else {
3543     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3544     isrowb = *rowb; iscolb = *colb;
3545     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3546     bseq[0] = *B_seq;
3547   }
3548   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3549   *B_seq = bseq[0];
3550   ierr = PetscFree(bseq);CHKERRQ(ierr);
3551   if (!rowb){
3552     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3553   } else {
3554     *rowb = isrowb;
3555   }
3556   if (!colb){
3557     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3558   } else {
3559     *colb = iscolb;
3560   }
3561   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 static PetscEvent logkey_GetBrowsOfAocols = 0;
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatGetBrowsOfAoCols"
3568 /*@C
3569     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3570     of the OFF-DIAGONAL portion of local A
3571 
3572     Collective on Mat
3573 
3574    Input Parameters:
3575 +    A,B - the matrices in mpiaij format
3576 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3577 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3578 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3579 
3580    Output Parameter:
3581 +    B_oth - the sequential matrix generated
3582 
3583     Level: developer
3584 
3585 @*/
3586 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3587 {
3588   VecScatter_MPI_General *gen_to,*gen_from;
3589   PetscErrorCode         ierr;
3590   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3591   Mat_SeqAIJ             *b_oth;
3592   VecScatter             ctx=a->Mvctx;
3593   MPI_Comm               comm=ctx->comm;
3594   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3595   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3596   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3597   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3598   MPI_Request            *rwaits,*swaits;
3599   MPI_Status             *sstatus,rstatus;
3600   PetscInt               *cols;
3601   PetscScalar            *vals;
3602   PetscMPIInt            j;
3603 
3604   PetscFunctionBegin;
3605   if (a->cstart != b->rstart || a->cend != b->rend){
3606     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3607   }
3608   if (!logkey_GetBrowsOfAocols) {
3609     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3610   }
3611   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3612   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3613 
3614   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3615   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3616   rvalues  = gen_from->values; /* holds the length of sending row */
3617   svalues  = gen_to->values;   /* holds the length of receiving row */
3618   nrecvs   = gen_from->n;
3619   nsends   = gen_to->n;
3620   rwaits   = gen_from->requests;
3621   swaits   = gen_to->requests;
3622   srow     = gen_to->indices;   /* local row index to be sent */
3623   rstarts  = gen_from->starts;
3624   sstarts  = gen_to->starts;
3625   rprocs   = gen_from->procs;
3626   sprocs   = gen_to->procs;
3627   sstatus  = gen_to->sstatus;
3628 
3629   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3630   if (scall == MAT_INITIAL_MATRIX){
3631     /* i-array */
3632     /*---------*/
3633     /*  post receives */
3634     for (i=0; i<nrecvs; i++){
3635       rowlen = (PetscInt*)rvalues + rstarts[i];
3636       nrows = rstarts[i+1]-rstarts[i];
3637       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3638     }
3639 
3640     /* pack the outgoing message */
3641     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3642     rstartsj = sstartsj + nsends +1;
3643     sstartsj[0] = 0;  rstartsj[0] = 0;
3644     len = 0; /* total length of j or a array to be sent */
3645     k = 0;
3646     for (i=0; i<nsends; i++){
3647       rowlen = (PetscInt*)svalues + sstarts[i];
3648       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3649       for (j=0; j<nrows; j++) {
3650         row = srow[k] + b->rowners[rank]; /* global row idx */
3651         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3652         len += rowlen[j];
3653         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3654         k++;
3655       }
3656       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3657        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3658     }
3659     /* recvs and sends of i-array are completed */
3660     i = nrecvs;
3661     while (i--) {
3662       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3663     }
3664     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3665     /* allocate buffers for sending j and a arrays */
3666     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3667     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3668 
3669     /* create i-array of B_oth */
3670     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3671     b_othi[0] = 0;
3672     len = 0; /* total length of j or a array to be received */
3673     k = 0;
3674     for (i=0; i<nrecvs; i++){
3675       rowlen = (PetscInt*)rvalues + rstarts[i];
3676       nrows = rstarts[i+1]-rstarts[i];
3677       for (j=0; j<nrows; j++) {
3678         b_othi[k+1] = b_othi[k] + rowlen[j];
3679         len += rowlen[j]; k++;
3680       }
3681       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3682     }
3683 
3684     /* allocate space for j and a arrrays of B_oth */
3685     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3686     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3687 
3688     /* j-array */
3689     /*---------*/
3690     /*  post receives of j-array */
3691     for (i=0; i<nrecvs; i++){
3692       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3693       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3694     }
3695     k = 0;
3696     for (i=0; i<nsends; i++){
3697       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3698       bufJ = bufj+sstartsj[i];
3699       for (j=0; j<nrows; j++) {
3700         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3701         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3702         for (l=0; l<ncols; l++){
3703           *bufJ++ = cols[l];
3704         }
3705         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3706       }
3707       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3708     }
3709 
3710     /* recvs and sends of j-array are completed */
3711     i = nrecvs;
3712     while (i--) {
3713       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3714     }
3715     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3716   } else if (scall == MAT_REUSE_MATRIX){
3717     sstartsj = *startsj;
3718     rstartsj = sstartsj + nsends +1;
3719     bufa     = *bufa_ptr;
3720     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3721     b_otha   = b_oth->a;
3722   } else {
3723     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3724   }
3725 
3726   /* a-array */
3727   /*---------*/
3728   /*  post receives of a-array */
3729   for (i=0; i<nrecvs; i++){
3730     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3731     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3732   }
3733   k = 0;
3734   for (i=0; i<nsends; i++){
3735     nrows = sstarts[i+1]-sstarts[i];
3736     bufA = bufa+sstartsj[i];
3737     for (j=0; j<nrows; j++) {
3738       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3739       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3740       for (l=0; l<ncols; l++){
3741         *bufA++ = vals[l];
3742       }
3743       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3744 
3745     }
3746     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3747   }
3748   /* recvs and sends of a-array are completed */
3749   i = nrecvs;
3750   while (i--) {
3751     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3752   }
3753    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3754 
3755   if (scall == MAT_INITIAL_MATRIX){
3756     /* put together the new matrix */
3757     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3758 
3759     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3760     /* Since these are PETSc arrays, change flags to free them as necessary. */
3761     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3762     b_oth->freedata = PETSC_TRUE;
3763     b_oth->nonew    = 0;
3764 
3765     ierr = PetscFree(bufj);CHKERRQ(ierr);
3766     if (!startsj || !bufa_ptr){
3767       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3768       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3769     } else {
3770       *startsj  = sstartsj;
3771       *bufa_ptr = bufa;
3772     }
3773   }
3774   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3775 
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 #undef __FUNCT__
3780 #define __FUNCT__ "MatGetCommunicationStructs"
3781 /*@C
3782   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3783 
3784   Not Collective
3785 
3786   Input Parameters:
3787 . A - The matrix in mpiaij format
3788 
3789   Output Parameter:
3790 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3791 . colmap - A map from global column index to local index into lvec
3792 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3793 
3794   Level: developer
3795 
3796 @*/
3797 #if defined (PETSC_USE_CTABLE)
3798 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3799 #else
3800 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3801 #endif
3802 {
3803   Mat_MPIAIJ *a;
3804 
3805   PetscFunctionBegin;
3806   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3807   PetscValidPointer(lvec, 2)
3808   PetscValidPointer(colmap, 3)
3809   PetscValidPointer(multScatter, 4)
3810   a = (Mat_MPIAIJ *) A->data;
3811   if (lvec) *lvec = a->lvec;
3812   if (colmap) *colmap = a->colmap;
3813   if (multScatter) *multScatter = a->Mvctx;
3814   PetscFunctionReturn(0);
3815 }
3816 
3817 /*MC
3818    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3819 
3820    Options Database Keys:
3821 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3822 
3823   Level: beginner
3824 
3825 .seealso: MatCreateMPIAIJ
3826 M*/
3827 
3828 EXTERN_C_BEGIN
3829 #undef __FUNCT__
3830 #define __FUNCT__ "MatCreate_MPIAIJ"
3831 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3832 {
3833   Mat_MPIAIJ     *b;
3834   PetscErrorCode ierr;
3835   PetscInt       i;
3836   PetscMPIInt    size;
3837 
3838   PetscFunctionBegin;
3839   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3840 
3841   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3842   B->data         = (void*)b;
3843   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3844   B->factor       = 0;
3845   B->bs           = 1;
3846   B->assembled    = PETSC_FALSE;
3847   B->mapping      = 0;
3848 
3849   B->insertmode      = NOT_SET_VALUES;
3850   b->size            = size;
3851   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3852 
3853   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3854   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3855 
3856   /* the information in the maps duplicates the information computed below, eventually
3857      we should remove the duplicate information that is not contained in the maps */
3858   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3859   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3860 
3861   /* build local table of row and column ownerships */
3862   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3863   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3864   b->cowners = b->rowners + b->size + 2;
3865   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3866   b->rowners[0] = 0;
3867   for (i=2; i<=b->size; i++) {
3868     b->rowners[i] += b->rowners[i-1];
3869   }
3870   b->rstart = b->rowners[b->rank];
3871   b->rend   = b->rowners[b->rank+1];
3872   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3873   b->cowners[0] = 0;
3874   for (i=2; i<=b->size; i++) {
3875     b->cowners[i] += b->cowners[i-1];
3876   }
3877   b->cstart = b->cowners[b->rank];
3878   b->cend   = b->cowners[b->rank+1];
3879 
3880   /* build cache for off array entries formed */
3881   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3882   b->donotstash  = PETSC_FALSE;
3883   b->colmap      = 0;
3884   b->garray      = 0;
3885   b->roworiented = PETSC_TRUE;
3886 
3887   /* stuff used for matrix vector multiply */
3888   b->lvec      = PETSC_NULL;
3889   b->Mvctx     = PETSC_NULL;
3890 
3891   /* stuff for MatGetRow() */
3892   b->rowindices   = 0;
3893   b->rowvalues    = 0;
3894   b->getrowactive = PETSC_FALSE;
3895 
3896   /* Explicitly create 2 MATSEQAIJ matrices. */
3897   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3898   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3899   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3900   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3901   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3902   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3903   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3904   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3905 
3906   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3907                                      "MatStoreValues_MPIAIJ",
3908                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3909   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3910                                      "MatRetrieveValues_MPIAIJ",
3911                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3912   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3913 				     "MatGetDiagonalBlock_MPIAIJ",
3914                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3915   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3916 				     "MatIsTranspose_MPIAIJ",
3917 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3918   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3919 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3920 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3921   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3922 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3923 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3924   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3925 				     "MatDiagonalScaleLocal_MPIAIJ",
3926 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3927   PetscFunctionReturn(0);
3928 }
3929 EXTERN_C_END
3930