xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cc310fdddfd1255ff354b307e63afef1e66c265d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petsc-private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatDistribute_MPIAIJ"
193 /*
194     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
195     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
196 
197     Only for square matrices
198 
199     Used by a preconditioner, hence PETSC_EXTERN
200 */
201 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
202 {
203   PetscMPIInt    rank,size;
204   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
205   PetscErrorCode ierr;
206   Mat            mat;
207   Mat_SeqAIJ     *gmata;
208   PetscMPIInt    tag;
209   MPI_Status     status;
210   PetscBool      aij;
211   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
212 
213   PetscFunctionBegin;
214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
216   if (!rank) {
217     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
218     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
219   }
220   if (reuse == MAT_INITIAL_MATRIX) {
221     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
222     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
223     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
224     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
225     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
226     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
227     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
228     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
229     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
230 
231     rowners[0] = 0;
232     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
233     rstart = rowners[rank];
234     rend   = rowners[rank+1];
235     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
236     if (!rank) {
237       gmata = (Mat_SeqAIJ*) gmat->data;
238       /* send row lengths to all processors */
239       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
240       for (i=1; i<size; i++) {
241         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
242       }
243       /* determine number diagonal and off-diagonal counts */
244       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
245       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
246       jj   = 0;
247       for (i=0; i<m; i++) {
248         for (j=0; j<dlens[i]; j++) {
249           if (gmata->j[jj] < rstart) ld[i]++;
250           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
251           jj++;
252         }
253       }
254       /* send column indices to other processes */
255       for (i=1; i<size; i++) {
256         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
257         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
259       }
260 
261       /* send numerical values to other processes */
262       for (i=1; i<size; i++) {
263         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
264         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
265       }
266       gmataa = gmata->a;
267       gmataj = gmata->j;
268 
269     } else {
270       /* receive row lengths */
271       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
272       /* receive column indices */
273       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
274       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
276       /* determine number diagonal and off-diagonal counts */
277       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
278       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
279       jj   = 0;
280       for (i=0; i<m; i++) {
281         for (j=0; j<dlens[i]; j++) {
282           if (gmataj[jj] < rstart) ld[i]++;
283           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
284           jj++;
285         }
286       }
287       /* receive numerical values */
288       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
289       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
290     }
291     /* set preallocation */
292     for (i=0; i<m; i++) {
293       dlens[i] -= olens[i];
294     }
295     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
296     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
297 
298     for (i=0; i<m; i++) {
299       dlens[i] += olens[i];
300     }
301     cnt = 0;
302     for (i=0; i<m; i++) {
303       row  = rstart + i;
304       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
305       cnt += dlens[i];
306     }
307     if (rank) {
308       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
309     }
310     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
311     ierr = PetscFree(rowners);CHKERRQ(ierr);
312 
313     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
314 
315     *inmat = mat;
316   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
317     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
318     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
319     mat  = *inmat;
320     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
321     if (!rank) {
322       /* send numerical values to other processes */
323       gmata  = (Mat_SeqAIJ*) gmat->data;
324       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
325       gmataa = gmata->a;
326       for (i=1; i<size; i++) {
327         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
328         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
329       }
330       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
331     } else {
332       /* receive numerical values from process 0*/
333       nz   = Ad->nz + Ao->nz;
334       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
335       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
336     }
337     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
338     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
339     ad = Ad->a;
340     ao = Ao->a;
341     if (mat->rmap->n) {
342       i  = 0;
343       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
344       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
345     }
346     for (i=1; i<mat->rmap->n; i++) {
347       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
348       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
349     }
350     i--;
351     if (mat->rmap->n) {
352       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
353     }
354     if (rank) {
355       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
356     }
357   }
358   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364   Local utility routine that creates a mapping from the global column
365 number to the local number in the off-diagonal part of the local
366 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
367 a slightly higher hash table cost; without it it is not scalable (each processor
368 has an order N integer array but is fast to acess.
369 */
370 #undef __FUNCT__
371 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
372 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
373 {
374   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
375   PetscErrorCode ierr;
376   PetscInt       n = aij->B->cmap->n,i;
377 
378   PetscFunctionBegin;
379   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
380 #if defined(PETSC_USE_CTABLE)
381   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
382   for (i=0; i<n; i++) {
383     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
384   }
385 #else
386   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
387   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
388   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
389 #endif
390   PetscFunctionReturn(0);
391 }
392 
393 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
394 { \
395     if (col <= lastcol1)  low1 = 0;     \
396     else                 high1 = nrow1; \
397     lastcol1 = col;\
398     while (high1-low1 > 5) { \
399       t = (low1+high1)/2; \
400       if (rp1[t] > col) high1 = t; \
401       else              low1  = t; \
402     } \
403       for (_i=low1; _i<high1; _i++) { \
404         if (rp1[_i] > col) break; \
405         if (rp1[_i] == col) { \
406           if (addv == ADD_VALUES) ap1[_i] += value;   \
407           else                    ap1[_i] = value; \
408           goto a_noinsert; \
409         } \
410       }  \
411       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
412       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
413       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
414       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
415       N = nrow1++ - 1; a->nz++; high1++; \
416       /* shift up all the later entries in this row */ \
417       for (ii=N; ii>=_i; ii--) { \
418         rp1[ii+1] = rp1[ii]; \
419         ap1[ii+1] = ap1[ii]; \
420       } \
421       rp1[_i] = col;  \
422       ap1[_i] = value;  \
423       A->nonzerostate++;\
424       a_noinsert: ; \
425       ailen[row] = nrow1; \
426 }
427 
428 
429 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
430   { \
431     if (col <= lastcol2) low2 = 0;                        \
432     else high2 = nrow2;                                   \
433     lastcol2 = col;                                       \
434     while (high2-low2 > 5) {                              \
435       t = (low2+high2)/2;                                 \
436       if (rp2[t] > col) high2 = t;                        \
437       else             low2  = t;                         \
438     }                                                     \
439     for (_i=low2; _i<high2; _i++) {                       \
440       if (rp2[_i] > col) break;                           \
441       if (rp2[_i] == col) {                               \
442         if (addv == ADD_VALUES) ap2[_i] += value;         \
443         else                    ap2[_i] = value;          \
444         goto b_noinsert;                                  \
445       }                                                   \
446     }                                                     \
447     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
448     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
449     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
450     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
451     N = nrow2++ - 1; b->nz++; high2++;                    \
452     /* shift up all the later entries in this row */      \
453     for (ii=N; ii>=_i; ii--) {                            \
454       rp2[ii+1] = rp2[ii];                                \
455       ap2[ii+1] = ap2[ii];                                \
456     }                                                     \
457     rp2[_i] = col;                                        \
458     ap2[_i] = value;                                      \
459     B->nonzerostate++;                                    \
460     b_noinsert: ;                                         \
461     bilen[row] = nrow2;                                   \
462   }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
466 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
467 {
468   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
469   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
470   PetscErrorCode ierr;
471   PetscInt       l,*garray = mat->garray,diag;
472 
473   PetscFunctionBegin;
474   /* code only works for square matrices A */
475 
476   /* find size of row to the left of the diagonal part */
477   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
478   row  = row - diag;
479   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
480     if (garray[b->j[b->i[row]+l]] > diag) break;
481   }
482   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
483 
484   /* diagonal part */
485   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
486 
487   /* right of diagonal part */
488   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatSetValues_MPIAIJ"
494 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
495 {
496   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
497   PetscScalar    value;
498   PetscErrorCode ierr;
499   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
500   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
501   PetscBool      roworiented = aij->roworiented;
502 
503   /* Some Variables required in the macro */
504   Mat        A                 = aij->A;
505   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
506   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
507   MatScalar  *aa               = a->a;
508   PetscBool  ignorezeroentries = a->ignorezeroentries;
509   Mat        B                 = aij->B;
510   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
511   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
512   MatScalar  *ba               = b->a;
513 
514   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
515   PetscInt  nonew;
516   MatScalar *ap1,*ap2;
517 
518   PetscFunctionBegin;
519   for (i=0; i<m; i++) {
520     if (im[i] < 0) continue;
521 #if defined(PETSC_USE_DEBUG)
522     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
523 #endif
524     if (im[i] >= rstart && im[i] < rend) {
525       row      = im[i] - rstart;
526       lastcol1 = -1;
527       rp1      = aj + ai[row];
528       ap1      = aa + ai[row];
529       rmax1    = aimax[row];
530       nrow1    = ailen[row];
531       low1     = 0;
532       high1    = nrow1;
533       lastcol2 = -1;
534       rp2      = bj + bi[row];
535       ap2      = ba + bi[row];
536       rmax2    = bimax[row];
537       nrow2    = bilen[row];
538       low2     = 0;
539       high2    = nrow2;
540 
541       for (j=0; j<n; j++) {
542         if (roworiented) value = v[i*n+j];
543         else             value = v[i+j*m];
544         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
545         if (in[j] >= cstart && in[j] < cend) {
546           col   = in[j] - cstart;
547           nonew = a->nonew;
548           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
549         } else if (in[j] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
552 #endif
553         else {
554           if (mat->was_assembled) {
555             if (!aij->colmap) {
556               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
557             }
558 #if defined(PETSC_USE_CTABLE)
559             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
560             col--;
561 #else
562             col = aij->colmap[in[j]] - 1;
563 #endif
564             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
565               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
566               col  =  in[j];
567               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
568               B     = aij->B;
569               b     = (Mat_SeqAIJ*)B->data;
570               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
571               rp2   = bj + bi[row];
572               ap2   = ba + bi[row];
573               rmax2 = bimax[row];
574               nrow2 = bilen[row];
575               low2  = 0;
576               high2 = nrow2;
577               bm    = aij->B->rmap->n;
578               ba    = b->a;
579             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
580           } else col = in[j];
581           nonew = b->nonew;
582           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
583         }
584       }
585     } else {
586       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
587       if (!aij->donotstash) {
588         mat->assembled = PETSC_FALSE;
589         if (roworiented) {
590           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
591         } else {
592           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
593         }
594       }
595     }
596   }
597   PetscFunctionReturn(0);
598 }
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "MatGetValues_MPIAIJ"
602 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
603 {
604   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
605   PetscErrorCode ierr;
606   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
607   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
608 
609   PetscFunctionBegin;
610   for (i=0; i<m; i++) {
611     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
612     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
613     if (idxm[i] >= rstart && idxm[i] < rend) {
614       row = idxm[i] - rstart;
615       for (j=0; j<n; j++) {
616         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
617         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
618         if (idxn[j] >= cstart && idxn[j] < cend) {
619           col  = idxn[j] - cstart;
620           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
621         } else {
622           if (!aij->colmap) {
623             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
624           }
625 #if defined(PETSC_USE_CTABLE)
626           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
627           col--;
628 #else
629           col = aij->colmap[idxn[j]] - 1;
630 #endif
631           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
632           else {
633             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
634           }
635         }
636       }
637     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
646 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
647 {
648   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
649   PetscErrorCode ierr;
650   PetscInt       nstash,reallocs;
651   InsertMode     addv;
652 
653   PetscFunctionBegin;
654   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
655 
656   /* make sure all processors are either in INSERTMODE or ADDMODE */
657   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
658   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
659   mat->insertmode = addv; /* in case this processor had no cache */
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
669 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
672   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
673   PetscErrorCode ierr;
674   PetscMPIInt    n;
675   PetscInt       i,j,rstart,ncols,flg;
676   PetscInt       *row,*col;
677   PetscBool      other_disassembled;
678   PetscScalar    *val;
679   InsertMode     addv = mat->insertmode;
680 
681   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
682 
683   PetscFunctionBegin;
684   if (!aij->donotstash && !mat->nooffprocentries) {
685     while (1) {
686       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
687       if (!flg) break;
688 
689       for (i=0; i<n; ) {
690         /* Now identify the consecutive vals belonging to the same row */
691         for (j=i,rstart=row[j]; j<n; j++) {
692           if (row[j] != rstart) break;
693         }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698 
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703   }
704   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
705   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
706 
707   /* determine if any processor has disassembled, if so we must
708      also disassemble ourselfs, in order that we may reassemble. */
709   /*
710      if nonzero structure of submatrix B cannot change then we know that
711      no processor disassembled thus we can skip this stuff
712   */
713   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
714     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
715     if (mat->was_assembled && !other_disassembled) {
716       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
717     }
718   }
719   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
720     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
721   }
722   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
723   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
724   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
725 
726   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
727 
728   aij->rowvalues = 0;
729 
730   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
731   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
732 
733   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
734   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
735     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
736     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
743 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
744 {
745   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
750   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNCT__
755 #define __FUNCT__ "MatZeroRows_MPIAIJ"
756 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
757 {
758   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
759   PetscInt      *owners = A->rmap->range;
760   PetscInt       n      = A->rmap->n;
761   PetscSF        sf;
762   PetscInt      *lrows;
763   PetscSFNode   *rrows;
764   PetscInt       r, p = 0, len = 0;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   /* Create SF where leaves are input rows and roots are owned rows */
769   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
770   for (r = 0; r < n; ++r) lrows[r] = -1;
771   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
772   for (r = 0; r < N; ++r) {
773     const PetscInt idx   = rows[r];
774     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
775     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
776       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
777     }
778     if (A->nooffproczerorows) {
779       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
780       lrows[len++] = idx - owners[p];
781     } else {
782       rrows[r].rank = p;
783       rrows[r].index = rows[r] - owners[p];
784     }
785   }
786   if (!A->nooffproczerorows) {
787     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
788     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
789     /* Collect flags for rows to be zeroed */
790     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
792     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
793     /* Compress and put in row numbers */
794     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
795   }
796   /* fix right hand side if needed */
797   if (x && b) {
798     const PetscScalar *xx;
799     PetscScalar       *bb;
800 
801     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
803     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
804     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
806   }
807   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
808   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
810     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
811   } else if (diag != 0.0) {
812     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
813     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
814     for (r = 0; r < len; ++r) {
815       const PetscInt row = lrows[r] + A->rmap->rstart;
816       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
817     }
818     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820   } else {
821     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
822   }
823   ierr = PetscFree(lrows);CHKERRQ(ierr);
824 
825   /* only change matrix nonzero state if pattern was allowed to be changed */
826   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
827     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
828     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
835 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
836 {
837   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
838   PetscErrorCode    ierr;
839   PetscMPIInt       n = A->rmap->n;
840   PetscInt          i,j,r,m,p = 0,len = 0;
841   PetscInt          *lrows,*owners = A->rmap->range;
842   PetscSFNode       *rrows;
843   PetscSF           sf;
844   const PetscScalar *xx;
845   PetscScalar       *bb,*mask;
846   Vec               xmask,lmask;
847   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
848   const PetscInt    *aj, *ii,*ridx;
849   PetscScalar       *aa;
850 
851   PetscFunctionBegin;
852   /* Create SF where leaves are input rows and roots are owned rows */
853   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
854   for (r = 0; r < n; ++r) lrows[r] = -1;
855   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
856   for (r = 0; r < N; ++r) {
857     const PetscInt idx   = rows[r];
858     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
859     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
860       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
861     }
862     rrows[r].rank  = p;
863     rrows[r].index = rows[r] - owners[p];
864   }
865   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
866   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
867   /* Collect flags for rows to be zeroed */
868   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
870   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
871   /* Compress and put in row numbers */
872   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
873   /* zero diagonal part of matrix */
874   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
875   /* handle off diagonal part of matrix */
876   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
877   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
878   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
879   for (i=0; i<len; i++) bb[lrows[i]] = 1;
880   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
881   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
884   if (x) {
885     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
888     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
889   }
890   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
891   /* remove zeroed rows of off diagonal matrix */
892   ii = aij->i;
893   for (i=0; i<len; i++) {
894     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
895   }
896   /* loop over all elements of off process part of matrix zeroing removed columns*/
897   if (aij->compressedrow.use) {
898     m    = aij->compressedrow.nrows;
899     ii   = aij->compressedrow.i;
900     ridx = aij->compressedrow.rindex;
901     for (i=0; i<m; i++) {
902       n  = ii[i+1] - ii[i];
903       aj = aij->j + ii[i];
904       aa = aij->a + ii[i];
905 
906       for (j=0; j<n; j++) {
907         if (PetscAbsScalar(mask[*aj])) {
908           if (b) bb[*ridx] -= *aa*xx[*aj];
909           *aa = 0.0;
910         }
911         aa++;
912         aj++;
913       }
914       ridx++;
915     }
916   } else { /* do not use compressed row format */
917     m = l->B->rmap->n;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij->a + ii[i];
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[i] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930     }
931   }
932   if (x) {
933     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
934     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
935   }
936   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
937   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
938   ierr = PetscFree(lrows);CHKERRQ(ierr);
939 
940   /* only change matrix nonzero state if pattern was allowed to be changed */
941   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
942     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
943     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPIAIJ"
950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
953   PetscErrorCode ierr;
954   PetscInt       nt;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
959   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatMultAdd_MPIAIJ"
980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999   PetscBool      merged;
1000 
1001   PetscFunctionBegin;
1002   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   if (!merged) {
1006     /* send it on its way */
1007     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008     /* do local part */
1009     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1010     /* receive remote parts: note this assumes the values are not actually */
1011     /* added in yy until the next line, */
1012     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1013   } else {
1014     /* do local part */
1015     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1016     /* send it on its way */
1017     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1018     /* values actually were received in the Begin() but we need to call this nop */
1019     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1026 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1027 {
1028   MPI_Comm       comm;
1029   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1030   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1031   IS             Me,Notme;
1032   PetscErrorCode ierr;
1033   PetscInt       M,N,first,last,*notme,i;
1034   PetscMPIInt    size;
1035 
1036   PetscFunctionBegin;
1037   /* Easy test: symmetric diagonal block */
1038   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1039   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* send it on its way */
1077   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   /* do local part */
1079   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* receive remote parts */
1081   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /*
1086   This only works correctly for square matrices where the subblock A->A is the
1087    diagonal block
1088 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1092 {
1093   PetscErrorCode ierr;
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095 
1096   PetscFunctionBegin;
1097   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1098   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1099   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatScale_MPIAIJ"
1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1112   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatDestroy_MPIAIJ"
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1142 
1143   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1152 #if defined(PETSC_HAVE_ELEMENTAL)
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1154 #endif
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1160 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1161 {
1162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1163   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1164   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1165   PetscErrorCode ierr;
1166   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1167   int            fd;
1168   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1169   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1170   PetscScalar    *column_values;
1171   PetscInt       message_count,flowcontrolcount;
1172   FILE           *file;
1173 
1174   PetscFunctionBegin;
1175   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1176   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1177   nz   = A->nz + B->nz;
1178   if (!rank) {
1179     header[0] = MAT_FILE_CLASSID;
1180     header[1] = mat->rmap->N;
1181     header[2] = mat->cmap->N;
1182 
1183     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1185     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     /* get largest number of rows any processor has */
1187     rlen  = mat->rmap->n;
1188     range = mat->rmap->range;
1189     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1190   } else {
1191     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     rlen = mat->rmap->n;
1193   }
1194 
1195   /* load up the local row counts */
1196   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1197   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1198 
1199   /* store the row lengths to the file */
1200   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1201   if (!rank) {
1202     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1203     for (i=1; i<size; i++) {
1204       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1205       rlen = range[i+1] - range[i];
1206       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     }
1209     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1210   } else {
1211     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1212     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1213     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1214   }
1215   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1216 
1217   /* load up the local column indices */
1218   nzmax = nz; /* th processor needs space a largest processor needs */
1219   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1220   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1221   cnt   = 0;
1222   for (i=0; i<mat->rmap->n; i++) {
1223     for (j=B->i[i]; j<B->i[i+1]; j++) {
1224       if ((col = garray[B->j[j]]) > cstart) break;
1225       column_indices[cnt++] = col;
1226     }
1227     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1228     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1229   }
1230   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1231 
1232   /* store the column indices to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1240       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1241       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     }
1244     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1245   } else {
1246     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1247     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1249     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1250   }
1251   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1252 
1253   /* load up the local column values */
1254   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1255   cnt  = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if (garray[B->j[j]] > cstart) break;
1259       column_values[cnt++] = B->a[j];
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1262     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_values);CHKERRQ(ierr);
1286 
1287   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1288   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #include <petscdraw.h>
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1295 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1296 {
1297   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1298   PetscErrorCode    ierr;
1299   PetscMPIInt       rank = aij->rank,size = aij->size;
1300   PetscBool         isdraw,iascii,isbinary;
1301   PetscViewer       sviewer;
1302   PetscViewerFormat format;
1303 
1304   PetscFunctionBegin;
1305   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1306   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1307   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1308   if (iascii) {
1309     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscBool inodes;
1313 
1314       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1315       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1317       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1318       if (!inodes) {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1321       } else {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1324       }
1325       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1327       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1332       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1337       if (inodes) {
1338         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1339       } else {
1340         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1349       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1350     } else {
1351       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1359   }
1360 
1361   {
1362     /* assemble the entire matrix onto first processor. */
1363     Mat        A;
1364     Mat_SeqAIJ *Aloc;
1365     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1366     MatScalar  *a;
1367     const char *matname;
1368 
1369     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1370     if (!rank) {
1371       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1372     } else {
1373       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1374     }
1375     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1376     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1377     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1378     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1379     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1380 
1381     /* copy over the A part */
1382     Aloc = (Mat_SeqAIJ*)aij->A->data;
1383     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1384     row  = mat->rmap->rstart;
1385     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1386     for (i=0; i<m; i++) {
1387       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1388       row++;
1389       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1390     }
1391     aj = Aloc->j;
1392     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1393 
1394     /* copy over the B part */
1395     Aloc = (Mat_SeqAIJ*)aij->B->data;
1396     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1397     row  = mat->rmap->rstart;
1398     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1399     ct   = cols;
1400     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1401     for (i=0; i<m; i++) {
1402       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1403       row++;
1404       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1405     }
1406     ierr = PetscFree(ct);CHKERRQ(ierr);
1407     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409     /*
1410        Everyone has to call to draw the matrix since the graphics waits are
1411        synchronized across all processors that share the PetscDraw object
1412     */
1413     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1414     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1415     if (!rank) {
1416       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1417       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1420     ierr = MatDestroy(&A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatView_MPIAIJ"
1427 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1428 {
1429   PetscErrorCode ierr;
1430   PetscBool      iascii,isdraw,issocket,isbinary;
1431 
1432   PetscFunctionBegin;
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1437   if (iascii || isdraw || isbinary || issocket) {
1438     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1439   }
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSOR_MPIAIJ"
1445 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1446 {
1447   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1448   PetscErrorCode ierr;
1449   Vec            bb1 = 0;
1450   PetscBool      hasop;
1451 
1452   PetscFunctionBegin;
1453   if (flag == SOR_APPLY_UPPER) {
1454     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455     PetscFunctionReturn(0);
1456   }
1457 
1458   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1459     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1460   }
1461 
1462   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1463     if (flag & SOR_ZERO_INITIAL_GUESS) {
1464       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1465       its--;
1466     }
1467 
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1480     if (flag & SOR_ZERO_INITIAL_GUESS) {
1481       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482       its--;
1483     }
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_EISENSTAT) {
1512     Vec xx1;
1513 
1514     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1515     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1516 
1517     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519     if (!mat->diag) {
1520       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1521       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1522     }
1523     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1524     if (hasop) {
1525       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1526     } else {
1527       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1528     }
1529     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1530 
1531     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1532 
1533     /* local sweep */
1534     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1535     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1536     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1537   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1538 
1539   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatPermute_MPIAIJ"
1545 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1546 {
1547   Mat            aA,aB,Aperm;
1548   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1549   PetscScalar    *aa,*ba;
1550   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1551   PetscSF        rowsf,sf;
1552   IS             parcolp = NULL;
1553   PetscBool      done;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1558   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1559   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1560   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1561 
1562   /* Invert row permutation to find out where my rows should go */
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1566   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1567   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1569 
1570   /* Invert column permutation to find out where my columns should go */
1571   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1572   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1573   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1574   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1575   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1576   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1578 
1579   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1580   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1581   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1582 
1583   /* Find out where my gcols should go */
1584   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1585   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1587   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1588   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1589   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1592 
1593   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1594   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1595   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1596   for (i=0; i<m; i++) {
1597     PetscInt row = rdest[i],rowner;
1598     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       PetscInt cowner,col = cdest[aj[j]];
1601       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1602       if (rowner == cowner) dnnz[i]++;
1603       else onnz[i]++;
1604     }
1605     for (j=bi[i]; j<bi[i+1]; j++) {
1606       PetscInt cowner,col = gcdest[bj[j]];
1607       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1608       if (rowner == cowner) dnnz[i]++;
1609       else onnz[i]++;
1610     }
1611   }
1612   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1616   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1617 
1618   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1619   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1620   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1621   for (i=0; i<m; i++) {
1622     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1623     PetscInt j0,rowlen;
1624     rowlen = ai[i+1] - ai[i];
1625     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1626       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1627       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1628     }
1629     rowlen = bi[i+1] - bi[i];
1630     for (j0=j=0; j<rowlen; j0=j) {
1631       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1632       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1633     }
1634   }
1635   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1636   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1637   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1638   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1639   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1640   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1641   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1643   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1644   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1645   *B = Aperm;
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1651 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1652 {
1653   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1654   Mat            A    = mat->A,B = mat->B;
1655   PetscErrorCode ierr;
1656   PetscReal      isend[5],irecv[5];
1657 
1658   PetscFunctionBegin;
1659   info->block_size = 1.0;
1660   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1661 
1662   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1663   isend[3] = info->memory;  isend[4] = info->mallocs;
1664 
1665   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1666 
1667   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1668   isend[3] += info->memory;  isend[4] += info->mallocs;
1669   if (flag == MAT_LOCAL) {
1670     info->nz_used      = isend[0];
1671     info->nz_allocated = isend[1];
1672     info->nz_unneeded  = isend[2];
1673     info->memory       = isend[3];
1674     info->mallocs      = isend[4];
1675   } else if (flag == MAT_GLOBAL_MAX) {
1676     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1677 
1678     info->nz_used      = irecv[0];
1679     info->nz_allocated = irecv[1];
1680     info->nz_unneeded  = irecv[2];
1681     info->memory       = irecv[3];
1682     info->mallocs      = irecv[4];
1683   } else if (flag == MAT_GLOBAL_SUM) {
1684     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1685 
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   }
1692   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1693   info->fill_ratio_needed = 0;
1694   info->factor_mallocs    = 0;
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatSetOption_MPIAIJ"
1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1701 {
1702   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   switch (op) {
1707   case MAT_NEW_NONZERO_LOCATIONS:
1708   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1709   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1710   case MAT_KEEP_NONZERO_PATTERN:
1711   case MAT_NEW_NONZERO_LOCATION_ERR:
1712   case MAT_USE_INODES:
1713   case MAT_IGNORE_ZERO_ENTRIES:
1714     MatCheckPreallocated(A,1);
1715     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1716     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_ROW_ORIENTED:
1719     a->roworiented = flg;
1720 
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_NEW_DIAGONALS:
1725     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   case MAT_SPD:
1731     A->spd_set = PETSC_TRUE;
1732     A->spd     = flg;
1733     if (flg) {
1734       A->symmetric                  = PETSC_TRUE;
1735       A->structurally_symmetric     = PETSC_TRUE;
1736       A->symmetric_set              = PETSC_TRUE;
1737       A->structurally_symmetric_set = PETSC_TRUE;
1738     }
1739     break;
1740   case MAT_SYMMETRIC:
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_STRUCTURALLY_SYMMETRIC:
1744     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1745     break;
1746   case MAT_HERMITIAN:
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_SYMMETRY_ETERNAL:
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   default:
1753     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatGetRow_MPIAIJ"
1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1761 {
1762   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1763   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1764   PetscErrorCode ierr;
1765   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1766   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1767   PetscInt       *cmap,*idx_p;
1768 
1769   PetscFunctionBegin;
1770   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1771   mat->getrowactive = PETSC_TRUE;
1772 
1773   if (!mat->rowvalues && (idx || v)) {
1774     /*
1775         allocate enough space to hold information from the longest row.
1776     */
1777     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1778     PetscInt   max = 1,tmp;
1779     for (i=0; i<matin->rmap->n; i++) {
1780       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1781       if (max < tmp) max = tmp;
1782     }
1783     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1784   }
1785 
1786   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1787   lrow = row - rstart;
1788 
1789   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1790   if (!v)   {pvA = 0; pvB = 0;}
1791   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1792   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1793   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1794   nztot = nzA + nzB;
1795 
1796   cmap = mat->garray;
1797   if (v  || idx) {
1798     if (nztot) {
1799       /* Sort by increasing column numbers, assuming A and B already sorted */
1800       PetscInt imark = -1;
1801       if (v) {
1802         *v = v_p = mat->rowvalues;
1803         for (i=0; i<nzB; i++) {
1804           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1805           else break;
1806         }
1807         imark = i;
1808         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1809         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1810       }
1811       if (idx) {
1812         *idx = idx_p = mat->rowindices;
1813         if (imark > -1) {
1814           for (i=0; i<imark; i++) {
1815             idx_p[i] = cmap[cworkB[i]];
1816           }
1817         } else {
1818           for (i=0; i<nzB; i++) {
1819             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1820             else break;
1821           }
1822           imark = i;
1823         }
1824         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1825         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1826       }
1827     } else {
1828       if (idx) *idx = 0;
1829       if (v)   *v   = 0;
1830     }
1831   }
1832   *nz  = nztot;
1833   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1834   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1840 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1841 {
1842   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1843 
1844   PetscFunctionBegin;
1845   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1846   aij->getrowactive = PETSC_FALSE;
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatNorm_MPIAIJ"
1852 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1853 {
1854   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1855   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1856   PetscErrorCode ierr;
1857   PetscInt       i,j,cstart = mat->cmap->rstart;
1858   PetscReal      sum = 0.0;
1859   MatScalar      *v;
1860 
1861   PetscFunctionBegin;
1862   if (aij->size == 1) {
1863     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1864   } else {
1865     if (type == NORM_FROBENIUS) {
1866       v = amat->a;
1867       for (i=0; i<amat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       v = bmat->a;
1871       for (i=0; i<bmat->nz; i++) {
1872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1873       }
1874       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1875       *norm = PetscSqrtReal(*norm);
1876     } else if (type == NORM_1) { /* max column norm */
1877       PetscReal *tmp,*tmp2;
1878       PetscInt  *jj,*garray = aij->garray;
1879       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1880       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1881       *norm = 0.0;
1882       v     = amat->a; jj = amat->j;
1883       for (j=0; j<amat->nz; j++) {
1884         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1885       }
1886       v = bmat->a; jj = bmat->j;
1887       for (j=0; j<bmat->nz; j++) {
1888         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1889       }
1890       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       for (j=0; j<mat->cmap->N; j++) {
1892         if (tmp2[j] > *norm) *norm = tmp2[j];
1893       }
1894       ierr = PetscFree(tmp);CHKERRQ(ierr);
1895       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1896     } else if (type == NORM_INFINITY) { /* max row norm */
1897       PetscReal ntemp = 0.0;
1898       for (j=0; j<aij->A->rmap->n; j++) {
1899         v   = amat->a + amat->i[j];
1900         sum = 0.0;
1901         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1902           sum += PetscAbsScalar(*v); v++;
1903         }
1904         v = bmat->a + bmat->i[j];
1905         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1906           sum += PetscAbsScalar(*v); v++;
1907         }
1908         if (sum > ntemp) ntemp = sum;
1909       }
1910       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatTranspose_MPIAIJ"
1918 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1919 {
1920   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1921   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1922   PetscErrorCode ierr;
1923   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1924   PetscInt       cstart = A->cmap->rstart,ncol;
1925   Mat            B;
1926   MatScalar      *array;
1927 
1928   PetscFunctionBegin;
1929   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1930 
1931   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1932   ai = Aloc->i; aj = Aloc->j;
1933   bi = Bloc->i; bj = Bloc->j;
1934   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1935     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1936     PetscSFNode          *oloc;
1937     PETSC_UNUSED PetscSF sf;
1938 
1939     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1940     /* compute d_nnz for preallocation */
1941     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1942     for (i=0; i<ai[ma]; i++) {
1943       d_nnz[aj[i]]++;
1944       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1945     }
1946     /* compute local off-diagonal contributions */
1947     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1948     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1949     /* map those to global */
1950     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1951     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1952     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1953     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1954     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1955     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1956     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1957 
1958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1960     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1962     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1963     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1964   } else {
1965     B    = *matout;
1966     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1968   }
1969 
1970   /* copy over the A part */
1971   array = Aloc->a;
1972   row   = A->rmap->rstart;
1973   for (i=0; i<ma; i++) {
1974     ncol = ai[i+1]-ai[i];
1975     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1976     row++;
1977     array += ncol; aj += ncol;
1978   }
1979   aj = Aloc->j;
1980   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1981 
1982   /* copy over the B part */
1983   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1984   array = Bloc->a;
1985   row   = A->rmap->rstart;
1986   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1987   cols_tmp = cols;
1988   for (i=0; i<mb; i++) {
1989     ncol = bi[i+1]-bi[i];
1990     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1991     row++;
1992     array += ncol; cols_tmp += ncol;
1993   }
1994   ierr = PetscFree(cols);CHKERRQ(ierr);
1995 
1996   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1999     *matout = B;
2000   } else {
2001     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2009 {
2010   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2011   Mat            a    = aij->A,b = aij->B;
2012   PetscErrorCode ierr;
2013   PetscInt       s1,s2,s3;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2017   if (rr) {
2018     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2019     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2020     /* Overlap communication with computation. */
2021     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022   }
2023   if (ll) {
2024     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2025     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2026     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2027   }
2028   /* scale  the diagonal block */
2029   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2030 
2031   if (rr) {
2032     /* Do a scatter end and then right scale the off-diagonal block */
2033     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2034     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2041 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2042 {
2043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatEqual_MPIAIJ"
2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2054 {
2055   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2056   Mat            a,b,c,d;
2057   PetscBool      flg;
2058   PetscErrorCode ierr;
2059 
2060   PetscFunctionBegin;
2061   a = matA->A; b = matA->B;
2062   c = matB->A; d = matB->B;
2063 
2064   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2065   if (flg) {
2066     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2067   }
2068   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatCopy_MPIAIJ"
2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2075 {
2076   PetscErrorCode ierr;
2077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2078   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2079 
2080   PetscFunctionBegin;
2081   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2082   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2083     /* because of the column compression in the off-processor part of the matrix a->B,
2084        the number of columns in a->B and b->B may be different, hence we cannot call
2085        the MatCopy() directly on the two parts. If need be, we can provide a more
2086        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2087        then copying the submatrices */
2088     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2089   } else {
2090     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2091     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatSetUp_MPIAIJ"
2098 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2099 {
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /*
2108    Computes the number of nonzeros per row needed for preallocation when X and Y
2109    have different nonzero structure.
2110 */
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2113 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2114 {
2115   PetscInt       i,j,k,nzx,nzy;
2116 
2117   PetscFunctionBegin;
2118   /* Set the number of nonzeros in the new matrix */
2119   for (i=0; i<m; i++) {
2120     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2121     nzx = xi[i+1] - xi[i];
2122     nzy = yi[i+1] - yi[i];
2123     nnz[i] = 0;
2124     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2125       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2126       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2127       nnz[i]++;
2128     }
2129     for (; k<nzy; k++) nnz[i]++;
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2138 {
2139   PetscErrorCode ierr;
2140   PetscInt       m = Y->rmap->N;
2141   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2142   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatAXPY_MPIAIJ"
2151 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2155   PetscBLASInt   bnz,one=1;
2156   Mat_SeqAIJ     *x,*y;
2157 
2158   PetscFunctionBegin;
2159   if (str == SAME_NONZERO_PATTERN) {
2160     PetscScalar alpha = a;
2161     x    = (Mat_SeqAIJ*)xx->A->data;
2162     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2163     y    = (Mat_SeqAIJ*)yy->A->data;
2164     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2165     x    = (Mat_SeqAIJ*)xx->B->data;
2166     y    = (Mat_SeqAIJ*)yy->B->data;
2167     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2168     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2169     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2170   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2171     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2172   } else {
2173     Mat      B;
2174     PetscInt *nnz_d,*nnz_o;
2175     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2176     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2177     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2178     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2179     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2180     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2181     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2182     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2183     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2184     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2185     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2186     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2187     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2188     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2189   }
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatConjugate_MPIAIJ"
2197 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2198 {
2199 #if defined(PETSC_USE_COMPLEX)
2200   PetscErrorCode ierr;
2201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2205   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2206 #else
2207   PetscFunctionBegin;
2208 #endif
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatRealPart_MPIAIJ"
2214 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2215 {
2216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2221   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2227 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2228 {
2229   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2234   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #if defined(PETSC_HAVE_PBGL)
2239 
2240 #include <boost/parallel/mpi/bsp_process_group.hpp>
2241 #include <boost/graph/distributed/ilu_default_graph.hpp>
2242 #include <boost/graph/distributed/ilu_0_block.hpp>
2243 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2244 #include <boost/graph/distributed/petsc/interface.hpp>
2245 #include <boost/multi_array.hpp>
2246 #include <boost/parallel/distributed_property_map->hpp>
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2250 /*
2251   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2252 */
2253 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2254 {
2255   namespace petsc = boost::distributed::petsc;
2256 
2257   namespace graph_dist = boost::graph::distributed;
2258   using boost::graph::distributed::ilu_default::process_group_type;
2259   using boost::graph::ilu_permuted;
2260 
2261   PetscBool      row_identity, col_identity;
2262   PetscContainer c;
2263   PetscInt       m, n, M, N;
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2268   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2269   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2270   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2271 
2272   process_group_type pg;
2273   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2274   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2275   lgraph_type& level_graph = *lgraph_p;
2276   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2277 
2278   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2279   ilu_permuted(level_graph);
2280 
2281   /* put together the new matrix */
2282   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2283   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2284   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2285   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2286   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2287   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2288   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2289   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2290 
2291   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2292   ierr = PetscContainerSetPointer(c, lgraph_p);
2293   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2294   ierr = PetscContainerDestroy(&c);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2300 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2301 {
2302   PetscFunctionBegin;
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatSolve_MPIAIJ"
2308 /*
2309   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2310 */
2311 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2312 {
2313   namespace graph_dist = boost::graph::distributed;
2314 
2315   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2316   lgraph_type    *lgraph_p;
2317   PetscContainer c;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2322   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2323   ierr = VecCopy(b, x);CHKERRQ(ierr);
2324 
2325   PetscScalar *array_x;
2326   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2327   PetscInt sx;
2328   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2329 
2330   PetscScalar *array_b;
2331   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2332   PetscInt sb;
2333   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2334 
2335   lgraph_type& level_graph = *lgraph_p;
2336   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2337 
2338   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2339   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2340   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2341 
2342   typedef boost::iterator_property_map<array_ref_type::iterator,
2343                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2344   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2345   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2346 
2347   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2348   PetscFunctionReturn(0);
2349 }
2350 #endif
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2354 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2355 {
2356   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2357   PetscErrorCode ierr;
2358   PetscInt       i,*idxb = 0;
2359   PetscScalar    *va,*vb;
2360   Vec            vtmp;
2361 
2362   PetscFunctionBegin;
2363   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2364   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2365   if (idx) {
2366     for (i=0; i<A->rmap->n; i++) {
2367       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2368     }
2369   }
2370 
2371   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2372   if (idx) {
2373     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2374   }
2375   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2376   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2377 
2378   for (i=0; i<A->rmap->n; i++) {
2379     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2380       va[i] = vb[i];
2381       if (idx) idx[i] = a->garray[idxb[i]];
2382     }
2383   }
2384 
2385   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2386   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2387   ierr = PetscFree(idxb);CHKERRQ(ierr);
2388   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2394 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2395 {
2396   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2397   PetscErrorCode ierr;
2398   PetscInt       i,*idxb = 0;
2399   PetscScalar    *va,*vb;
2400   Vec            vtmp;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2405   if (idx) {
2406     for (i=0; i<A->cmap->n; i++) {
2407       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2408     }
2409   }
2410 
2411   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2412   if (idx) {
2413     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2414   }
2415   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2416   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2417 
2418   for (i=0; i<A->rmap->n; i++) {
2419     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2420       va[i] = vb[i];
2421       if (idx) idx[i] = a->garray[idxb[i]];
2422     }
2423   }
2424 
2425   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2426   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2427   ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2434 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2435 {
2436   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2437   PetscInt       n      = A->rmap->n;
2438   PetscInt       cstart = A->cmap->rstart;
2439   PetscInt       *cmap  = mat->garray;
2440   PetscInt       *diagIdx, *offdiagIdx;
2441   Vec            diagV, offdiagV;
2442   PetscScalar    *a, *diagA, *offdiagA;
2443   PetscInt       r;
2444   PetscErrorCode ierr;
2445 
2446   PetscFunctionBegin;
2447   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2449   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2450   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2451   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2452   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2453   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2454   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2455   for (r = 0; r < n; ++r) {
2456     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2457       a[r]   = diagA[r];
2458       idx[r] = cstart + diagIdx[r];
2459     } else {
2460       a[r]   = offdiagA[r];
2461       idx[r] = cmap[offdiagIdx[r]];
2462     }
2463   }
2464   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2465   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2466   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2467   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2468   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2469   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2475 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2476 {
2477   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2478   PetscInt       n      = A->rmap->n;
2479   PetscInt       cstart = A->cmap->rstart;
2480   PetscInt       *cmap  = mat->garray;
2481   PetscInt       *diagIdx, *offdiagIdx;
2482   Vec            diagV, offdiagV;
2483   PetscScalar    *a, *diagA, *offdiagA;
2484   PetscInt       r;
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2491   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2492   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2494   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2495   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for (r = 0; r < n; ++r) {
2497     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2498       a[r]   = diagA[r];
2499       idx[r] = cstart + diagIdx[r];
2500     } else {
2501       a[r]   = offdiagA[r];
2502       idx[r] = cmap[offdiagIdx[r]];
2503     }
2504   }
2505   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2509   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2510   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2516 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2517 {
2518   PetscErrorCode ierr;
2519   Mat            *dummy;
2520 
2521   PetscFunctionBegin;
2522   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2523   *newmat = *dummy;
2524   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 #undef __FUNCT__
2529 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2530 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2531 {
2532   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2533   PetscErrorCode ierr;
2534 
2535   PetscFunctionBegin;
2536   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2542 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2546 
2547   PetscFunctionBegin;
2548   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2549   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2550   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2551   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 /* -------------------------------------------------------------------*/
2556 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2557                                        MatGetRow_MPIAIJ,
2558                                        MatRestoreRow_MPIAIJ,
2559                                        MatMult_MPIAIJ,
2560                                 /* 4*/ MatMultAdd_MPIAIJ,
2561                                        MatMultTranspose_MPIAIJ,
2562                                        MatMultTransposeAdd_MPIAIJ,
2563 #if defined(PETSC_HAVE_PBGL)
2564                                        MatSolve_MPIAIJ,
2565 #else
2566                                        0,
2567 #endif
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIAIJ,
2574                                        MatTranspose_MPIAIJ,
2575                                 /*15*/ MatGetInfo_MPIAIJ,
2576                                        MatEqual_MPIAIJ,
2577                                        MatGetDiagonal_MPIAIJ,
2578                                        MatDiagonalScale_MPIAIJ,
2579                                        MatNorm_MPIAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2581                                        MatAssemblyEnd_MPIAIJ,
2582                                        MatSetOption_MPIAIJ,
2583                                        MatZeroEntries_MPIAIJ,
2584                                 /*24*/ MatZeroRows_MPIAIJ,
2585                                        0,
2586 #if defined(PETSC_HAVE_PBGL)
2587                                        0,
2588 #else
2589                                        0,
2590 #endif
2591                                        0,
2592                                        0,
2593                                 /*29*/ MatSetUp_MPIAIJ,
2594 #if defined(PETSC_HAVE_PBGL)
2595                                        0,
2596 #else
2597                                        0,
2598 #endif
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*34*/ MatDuplicate_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                 /*39*/ MatAXPY_MPIAIJ,
2608                                        MatGetSubMatrices_MPIAIJ,
2609                                        MatIncreaseOverlap_MPIAIJ,
2610                                        MatGetValues_MPIAIJ,
2611                                        MatCopy_MPIAIJ,
2612                                 /*44*/ MatGetRowMax_MPIAIJ,
2613                                        MatScale_MPIAIJ,
2614                                        0,
2615                                        MatDiagonalSet_MPIAIJ,
2616                                        MatZeroRowsColumns_MPIAIJ,
2617                                 /*49*/ MatSetRandom_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2623                                        0,
2624                                        MatSetUnfactored_MPIAIJ,
2625                                        MatPermute_MPIAIJ,
2626                                        0,
2627                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2628                                        MatDestroy_MPIAIJ,
2629                                        MatView_MPIAIJ,
2630                                        0,
2631                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2632                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2633                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2638                                        MatGetRowMinAbs_MPIAIJ,
2639                                        0,
2640                                        MatSetColoring_MPIAIJ,
2641                                        0,
2642                                        MatSetValuesAdifor_MPIAIJ,
2643                                 /*75*/ MatFDColoringApply_AIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        MatFindZeroDiagonals_MPIAIJ,
2648                                 /*80*/ 0,
2649                                        0,
2650                                        0,
2651                                 /*83*/ MatLoad_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2658                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2659                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2660                                        MatPtAP_MPIAIJ_MPIAIJ,
2661                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2662                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*99*/ 0,
2668                                        0,
2669                                        0,
2670                                        MatConjugate_MPIAIJ,
2671                                        0,
2672                                 /*104*/MatSetValuesRow_MPIAIJ,
2673                                        MatRealPart_MPIAIJ,
2674                                        MatImaginaryPart_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                 /*109*/0,
2678                                        0,
2679                                        MatGetRowMin_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*119*/0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        MatGetMultiProcBlock_MPIAIJ,
2692                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2693                                        MatGetColumnNorms_MPIAIJ,
2694                                        MatInvertBlockDiagonal_MPIAIJ,
2695                                        0,
2696                                        MatGetSubMatricesParallel_MPIAIJ,
2697                                 /*129*/0,
2698                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2699                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2700                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2701                                        0,
2702                                 /*134*/0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                 /*139*/0,
2708                                        0,
2709                                        0,
2710                                        MatFDColoringSetUp_MPIXAIJ,
2711                                        0,
2712                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2713 };
2714 
2715 /* ----------------------------------------------------------------------------------------*/
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2719 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2720 {
2721   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2726   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2732 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2733 {
2734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2739   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2745 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2746 {
2747   Mat_MPIAIJ     *b;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2752   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2753   b = (Mat_MPIAIJ*)B->data;
2754 
2755   if (!B->preallocated) {
2756     /* Explicitly create 2 MATSEQAIJ matrices. */
2757     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2758     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2759     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2760     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2761     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2762     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2763     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2764     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2765     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2766     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2767   }
2768 
2769   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2770   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2771   B->preallocated = PETSC_TRUE;
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #undef __FUNCT__
2776 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2777 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2778 {
2779   Mat            mat;
2780   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   *newmat = 0;
2785   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2786   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2787   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2788   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2789   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2790   a       = (Mat_MPIAIJ*)mat->data;
2791 
2792   mat->factortype   = matin->factortype;
2793   mat->assembled    = PETSC_TRUE;
2794   mat->insertmode   = NOT_SET_VALUES;
2795   mat->preallocated = PETSC_TRUE;
2796 
2797   a->size         = oldmat->size;
2798   a->rank         = oldmat->rank;
2799   a->donotstash   = oldmat->donotstash;
2800   a->roworiented  = oldmat->roworiented;
2801   a->rowindices   = 0;
2802   a->rowvalues    = 0;
2803   a->getrowactive = PETSC_FALSE;
2804 
2805   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2807 
2808   if (oldmat->colmap) {
2809 #if defined(PETSC_USE_CTABLE)
2810     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2811 #else
2812     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2813     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2814     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2815 #endif
2816   } else a->colmap = 0;
2817   if (oldmat->garray) {
2818     PetscInt len;
2819     len  = oldmat->B->cmap->n;
2820     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2821     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2822     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2823   } else a->garray = 0;
2824 
2825   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2826   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2827   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2828   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2829   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2830   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2831   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2832   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2833   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2834   *newmat = mat;
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatLoad_MPIAIJ"
2842 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2843 {
2844   PetscScalar    *vals,*svals;
2845   MPI_Comm       comm;
2846   PetscErrorCode ierr;
2847   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2848   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2849   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2850   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2851   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2852   int            fd;
2853   PetscInt       bs = newMat->rmap->bs;
2854 
2855   PetscFunctionBegin;
2856   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2857   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2858   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2859   if (!rank) {
2860     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2861     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2862     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2863   }
2864 
2865   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2866   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2867   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2868   if (bs < 0) bs = 1;
2869 
2870   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2871 
2872   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2873   M    = header[1]; N = header[2];
2874   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2875   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2876   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2877 
2878   /* If global sizes are set, check if they are consistent with that given in the file */
2879   if (sizesset) {
2880     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2881   }
2882   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2883   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2884 
2885   /* determine ownership of all (block) rows */
2886   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2887   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2888   else m = newMat->rmap->n; /* Set by user */
2889 
2890   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax = rowners[1];
2896     for (i=2; i<=size; i++) {
2897       mmax = PetscMax(mmax, rowners[i]);
2898     }
2899   } else mmax = -1;             /* unused, but compilers complain */
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2914     for (j=0; j<m; j++) {
2915       procsnz[0] += ourlens[j];
2916     }
2917     for (i=1; i<size; i++) {
2918       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2919       /* calculate the number of nonzeros on each processor */
2920       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2921         procsnz[i] += rowlengths[j];
2922       }
2923       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2924     }
2925     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2926   } else {
2927     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2928   }
2929 
2930   if (!rank) {
2931     /* determine max buffer needed and allocate it */
2932     maxnz = 0;
2933     for (i=0; i<size; i++) {
2934       maxnz = PetscMax(maxnz,procsnz[i]);
2935     }
2936     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2937 
2938     /* read in my part of the matrix column indices  */
2939     nz   = procsnz[0];
2940     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2941     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2942 
2943     /* read in every one elses and ship off */
2944     for (i=1; i<size; i++) {
2945       nz   = procsnz[i];
2946       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2947       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(cols);CHKERRQ(ierr);
2950   } else {
2951     /* determine buffer space needed for message */
2952     nz = 0;
2953     for (i=0; i<m; i++) {
2954       nz += ourlens[i];
2955     }
2956     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2957 
2958     /* receive message of column indices*/
2959     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2960   }
2961 
2962   /* determine column ownership if matrix is not square */
2963   if (N != M) {
2964     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2965     else n = newMat->cmap->n;
2966     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2967     cstart = cend - n;
2968   } else {
2969     cstart = rstart;
2970     cend   = rend;
2971     n      = cend - cstart;
2972   }
2973 
2974   /* loop over local rows, determining number of off diagonal entries */
2975   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2976   jj   = 0;
2977   for (i=0; i<m; i++) {
2978     for (j=0; j<ourlens[i]; j++) {
2979       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2980       jj++;
2981     }
2982   }
2983 
2984   for (i=0; i<m; i++) {
2985     ourlens[i] -= offlens[i];
2986   }
2987   if (!sizesset) {
2988     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2989   }
2990 
2991   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2992 
2993   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2994 
2995   for (i=0; i<m; i++) {
2996     ourlens[i] += offlens[i];
2997   }
2998 
2999   if (!rank) {
3000     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3001 
3002     /* read in my part of the matrix numerical values  */
3003     nz   = procsnz[0];
3004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3005 
3006     /* insert into matrix */
3007     jj      = rstart;
3008     smycols = mycols;
3009     svals   = vals;
3010     for (i=0; i<m; i++) {
3011       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3012       smycols += ourlens[i];
3013       svals   += ourlens[i];
3014       jj++;
3015     }
3016 
3017     /* read in other processors and ship out */
3018     for (i=1; i<size; i++) {
3019       nz   = procsnz[i];
3020       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3021       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3022     }
3023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3024   } else {
3025     /* receive numeric values */
3026     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3027 
3028     /* receive message of values*/
3029     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3030 
3031     /* insert into matrix */
3032     jj      = rstart;
3033     smycols = mycols;
3034     svals   = vals;
3035     for (i=0; i<m; i++) {
3036       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3037       smycols += ourlens[i];
3038       svals   += ourlens[i];
3039       jj++;
3040     }
3041   }
3042   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3043   ierr = PetscFree(vals);CHKERRQ(ierr);
3044   ierr = PetscFree(mycols);CHKERRQ(ierr);
3045   ierr = PetscFree(rowners);CHKERRQ(ierr);
3046   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3047   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3053 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3054 {
3055   PetscErrorCode ierr;
3056   IS             iscol_local;
3057   PetscInt       csize;
3058 
3059   PetscFunctionBegin;
3060   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3061   if (call == MAT_REUSE_MATRIX) {
3062     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3063     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3064   } else {
3065     PetscInt cbs;
3066     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3067     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3068     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3069   }
3070   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3071   if (call == MAT_INITIAL_MATRIX) {
3072     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3073     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3074   }
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3081 /*
3082     Not great since it makes two copies of the submatrix, first an SeqAIJ
3083   in local and then by concatenating the local matrices the end result.
3084   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3085 
3086   Note: This requires a sequential iscol with all indices.
3087 */
3088 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3089 {
3090   PetscErrorCode ierr;
3091   PetscMPIInt    rank,size;
3092   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3093   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3094   PetscBool      allcolumns, colflag;
3095   Mat            M,Mreuse;
3096   MatScalar      *vwork,*aa;
3097   MPI_Comm       comm;
3098   Mat_SeqAIJ     *aij;
3099 
3100   PetscFunctionBegin;
3101   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3102   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3104 
3105   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3106   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3107   if (colflag && ncol == mat->cmap->N) {
3108     allcolumns = PETSC_TRUE;
3109   } else {
3110     allcolumns = PETSC_FALSE;
3111   }
3112   if (call ==  MAT_REUSE_MATRIX) {
3113     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3114     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3115     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3116   } else {
3117     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3118   }
3119 
3120   /*
3121       m - number of local rows
3122       n - number of columns (same on all processors)
3123       rstart - first row in new global matrix generated
3124   */
3125   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3126   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3127   if (call == MAT_INITIAL_MATRIX) {
3128     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3129     ii  = aij->i;
3130     jj  = aij->j;
3131 
3132     /*
3133         Determine the number of non-zeros in the diagonal and off-diagonal
3134         portions of the matrix in order to do correct preallocation
3135     */
3136 
3137     /* first get start and end of "diagonal" columns */
3138     if (csize == PETSC_DECIDE) {
3139       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3140       if (mglobal == n) { /* square matrix */
3141         nlocal = m;
3142       } else {
3143         nlocal = n/size + ((n % size) > rank);
3144       }
3145     } else {
3146       nlocal = csize;
3147     }
3148     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3149     rstart = rend - nlocal;
3150     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3151 
3152     /* next, compute all the lengths */
3153     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3154     olens = dlens + m;
3155     for (i=0; i<m; i++) {
3156       jend = ii[i+1] - ii[i];
3157       olen = 0;
3158       dlen = 0;
3159       for (j=0; j<jend; j++) {
3160         if (*jj < rstart || *jj >= rend) olen++;
3161         else dlen++;
3162         jj++;
3163       }
3164       olens[i] = olen;
3165       dlens[i] = dlen;
3166     }
3167     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3168     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3169     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3170     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3171     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3172     ierr = PetscFree(dlens);CHKERRQ(ierr);
3173   } else {
3174     PetscInt ml,nl;
3175 
3176     M    = *newmat;
3177     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3178     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3179     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3180     /*
3181          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3182        rather than the slower MatSetValues().
3183     */
3184     M->was_assembled = PETSC_TRUE;
3185     M->assembled     = PETSC_FALSE;
3186   }
3187   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3188   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3189   ii   = aij->i;
3190   jj   = aij->j;
3191   aa   = aij->a;
3192   for (i=0; i<m; i++) {
3193     row   = rstart + i;
3194     nz    = ii[i+1] - ii[i];
3195     cwork = jj;     jj += nz;
3196     vwork = aa;     aa += nz;
3197     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3198   }
3199 
3200   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   *newmat = M;
3203 
3204   /* save submatrix used in processor for next request */
3205   if (call ==  MAT_INITIAL_MATRIX) {
3206     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3207     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3208   }
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3214 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3215 {
3216   PetscInt       m,cstart, cend,j,nnz,i,d;
3217   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3218   const PetscInt *JJ;
3219   PetscScalar    *values;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3224 
3225   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3226   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3227   m      = B->rmap->n;
3228   cstart = B->cmap->rstart;
3229   cend   = B->cmap->rend;
3230   rstart = B->rmap->rstart;
3231 
3232   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3233 
3234 #if defined(PETSC_USE_DEBUGGING)
3235   for (i=0; i<m; i++) {
3236     nnz = Ii[i+1]- Ii[i];
3237     JJ  = J + Ii[i];
3238     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3239     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3240     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3241   }
3242 #endif
3243 
3244   for (i=0; i<m; i++) {
3245     nnz     = Ii[i+1]- Ii[i];
3246     JJ      = J + Ii[i];
3247     nnz_max = PetscMax(nnz_max,nnz);
3248     d       = 0;
3249     for (j=0; j<nnz; j++) {
3250       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3251     }
3252     d_nnz[i] = d;
3253     o_nnz[i] = nnz - d;
3254   }
3255   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3256   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3257 
3258   if (v) values = (PetscScalar*)v;
3259   else {
3260     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3261   }
3262 
3263   for (i=0; i<m; i++) {
3264     ii   = i + rstart;
3265     nnz  = Ii[i+1]- Ii[i];
3266     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3267   }
3268   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3269   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3270 
3271   if (!v) {
3272     ierr = PetscFree(values);CHKERRQ(ierr);
3273   }
3274   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3280 /*@
3281    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3282    (the default parallel PETSc format).
3283 
3284    Collective on MPI_Comm
3285 
3286    Input Parameters:
3287 +  B - the matrix
3288 .  i - the indices into j for the start of each local row (starts with zero)
3289 .  j - the column indices for each local row (starts with zero)
3290 -  v - optional values in the matrix
3291 
3292    Level: developer
3293 
3294    Notes:
3295        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3296      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3297      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3298 
3299        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3300 
3301        The format which is used for the sparse matrix input, is equivalent to a
3302     row-major ordering.. i.e for the following matrix, the input data expected is
3303     as shown:
3304 
3305         1 0 0
3306         2 0 3     P0
3307        -------
3308         4 5 6     P1
3309 
3310      Process0 [P0]: rows_owned=[0,1]
3311         i =  {0,1,3}  [size = nrow+1  = 2+1]
3312         j =  {0,0,2}  [size = nz = 6]
3313         v =  {1,2,3}  [size = nz = 6]
3314 
3315      Process1 [P1]: rows_owned=[2]
3316         i =  {0,3}    [size = nrow+1  = 1+1]
3317         j =  {0,1,2}  [size = nz = 6]
3318         v =  {4,5,6}  [size = nz = 6]
3319 
3320 .keywords: matrix, aij, compressed row, sparse, parallel
3321 
3322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3323           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3324 @*/
3325 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3326 {
3327   PetscErrorCode ierr;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 #undef __FUNCT__
3335 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3336 /*@C
3337    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3338    (the default parallel PETSc format).  For good matrix assembly performance
3339    the user should preallocate the matrix storage by setting the parameters
3340    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3341    performance can be increased by more than a factor of 50.
3342 
3343    Collective on MPI_Comm
3344 
3345    Input Parameters:
3346 +  B - the matrix
3347 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3348            (same value is used for all local rows)
3349 .  d_nnz - array containing the number of nonzeros in the various rows of the
3350            DIAGONAL portion of the local submatrix (possibly different for each row)
3351            or NULL, if d_nz is used to specify the nonzero structure.
3352            The size of this array is equal to the number of local rows, i.e 'm'.
3353            For matrices that will be factored, you must leave room for (and set)
3354            the diagonal entry even if it is zero.
3355 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3356            submatrix (same value is used for all local rows).
3357 -  o_nnz - array containing the number of nonzeros in the various rows of the
3358            OFF-DIAGONAL portion of the local submatrix (possibly different for
3359            each row) or NULL, if o_nz is used to specify the nonzero
3360            structure. The size of this array is equal to the number
3361            of local rows, i.e 'm'.
3362 
3363    If the *_nnz parameter is given then the *_nz parameter is ignored
3364 
3365    The AIJ format (also called the Yale sparse matrix format or
3366    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3367    storage.  The stored row and column indices begin with zero.
3368    See Users-Manual: ch_mat for details.
3369 
3370    The parallel matrix is partitioned such that the first m0 rows belong to
3371    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3372    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3373 
3374    The DIAGONAL portion of the local submatrix of a processor can be defined
3375    as the submatrix which is obtained by extraction the part corresponding to
3376    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3377    first row that belongs to the processor, r2 is the last row belonging to
3378    the this processor, and c1-c2 is range of indices of the local part of a
3379    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3380    common case of a square matrix, the row and column ranges are the same and
3381    the DIAGONAL part is also square. The remaining portion of the local
3382    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3383 
3384    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3385 
3386    You can call MatGetInfo() to get information on how effective the preallocation was;
3387    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3388    You can also run with the option -info and look for messages with the string
3389    malloc in them to see if additional memory allocation was needed.
3390 
3391    Example usage:
3392 
3393    Consider the following 8x8 matrix with 34 non-zero values, that is
3394    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3395    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3396    as follows:
3397 
3398 .vb
3399             1  2  0  |  0  3  0  |  0  4
3400     Proc0   0  5  6  |  7  0  0  |  8  0
3401             9  0 10  | 11  0  0  | 12  0
3402     -------------------------------------
3403            13  0 14  | 15 16 17  |  0  0
3404     Proc1   0 18  0  | 19 20 21  |  0  0
3405             0  0  0  | 22 23  0  | 24  0
3406     -------------------------------------
3407     Proc2  25 26 27  |  0  0 28  | 29  0
3408            30  0  0  | 31 32 33  |  0 34
3409 .ve
3410 
3411    This can be represented as a collection of submatrices as:
3412 
3413 .vb
3414       A B C
3415       D E F
3416       G H I
3417 .ve
3418 
3419    Where the submatrices A,B,C are owned by proc0, D,E,F are
3420    owned by proc1, G,H,I are owned by proc2.
3421 
3422    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3423    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3424    The 'M','N' parameters are 8,8, and have the same values on all procs.
3425 
3426    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3427    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3428    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3429    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3430    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3431    matrix, ans [DF] as another SeqAIJ matrix.
3432 
3433    When d_nz, o_nz parameters are specified, d_nz storage elements are
3434    allocated for every row of the local diagonal submatrix, and o_nz
3435    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3436    One way to choose d_nz and o_nz is to use the max nonzerors per local
3437    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3438    In this case, the values of d_nz,o_nz are:
3439 .vb
3440      proc0 : dnz = 2, o_nz = 2
3441      proc1 : dnz = 3, o_nz = 2
3442      proc2 : dnz = 1, o_nz = 4
3443 .ve
3444    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3445    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3446    for proc3. i.e we are using 12+15+10=37 storage locations to store
3447    34 values.
3448 
3449    When d_nnz, o_nnz parameters are specified, the storage is specified
3450    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3451    In the above case the values for d_nnz,o_nnz are:
3452 .vb
3453      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3454      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3455      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3456 .ve
3457    Here the space allocated is sum of all the above values i.e 34, and
3458    hence pre-allocation is perfect.
3459 
3460    Level: intermediate
3461 
3462 .keywords: matrix, aij, compressed row, sparse, parallel
3463 
3464 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3465           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3466 @*/
3467 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3468 {
3469   PetscErrorCode ierr;
3470 
3471   PetscFunctionBegin;
3472   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3473   PetscValidType(B,1);
3474   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 #undef __FUNCT__
3479 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3480 /*@
3481      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3482          CSR format the local rows.
3483 
3484    Collective on MPI_Comm
3485 
3486    Input Parameters:
3487 +  comm - MPI communicator
3488 .  m - number of local rows (Cannot be PETSC_DECIDE)
3489 .  n - This value should be the same as the local size used in creating the
3490        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3491        calculated if N is given) For square matrices n is almost always m.
3492 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3493 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3494 .   i - row indices
3495 .   j - column indices
3496 -   a - matrix values
3497 
3498    Output Parameter:
3499 .   mat - the matrix
3500 
3501    Level: intermediate
3502 
3503    Notes:
3504        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3505      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3506      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3507 
3508        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3509 
3510        The format which is used for the sparse matrix input, is equivalent to a
3511     row-major ordering.. i.e for the following matrix, the input data expected is
3512     as shown:
3513 
3514         1 0 0
3515         2 0 3     P0
3516        -------
3517         4 5 6     P1
3518 
3519      Process0 [P0]: rows_owned=[0,1]
3520         i =  {0,1,3}  [size = nrow+1  = 2+1]
3521         j =  {0,0,2}  [size = nz = 6]
3522         v =  {1,2,3}  [size = nz = 6]
3523 
3524      Process1 [P1]: rows_owned=[2]
3525         i =  {0,3}    [size = nrow+1  = 1+1]
3526         j =  {0,1,2}  [size = nz = 6]
3527         v =  {4,5,6}  [size = nz = 6]
3528 
3529 .keywords: matrix, aij, compressed row, sparse, parallel
3530 
3531 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3532           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3533 @*/
3534 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3535 {
3536   PetscErrorCode ierr;
3537 
3538   PetscFunctionBegin;
3539   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3540   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3541   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3542   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3543   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3544   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3545   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3546   PetscFunctionReturn(0);
3547 }
3548 
3549 #undef __FUNCT__
3550 #define __FUNCT__ "MatCreateAIJ"
3551 /*@C
3552    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3553    (the default parallel PETSc format).  For good matrix assembly performance
3554    the user should preallocate the matrix storage by setting the parameters
3555    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3556    performance can be increased by more than a factor of 50.
3557 
3558    Collective on MPI_Comm
3559 
3560    Input Parameters:
3561 +  comm - MPI communicator
3562 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3563            This value should be the same as the local size used in creating the
3564            y vector for the matrix-vector product y = Ax.
3565 .  n - This value should be the same as the local size used in creating the
3566        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3567        calculated if N is given) For square matrices n is almost always m.
3568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3570 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3571            (same value is used for all local rows)
3572 .  d_nnz - array containing the number of nonzeros in the various rows of the
3573            DIAGONAL portion of the local submatrix (possibly different for each row)
3574            or NULL, if d_nz is used to specify the nonzero structure.
3575            The size of this array is equal to the number of local rows, i.e 'm'.
3576 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3577            submatrix (same value is used for all local rows).
3578 -  o_nnz - array containing the number of nonzeros in the various rows of the
3579            OFF-DIAGONAL portion of the local submatrix (possibly different for
3580            each row) or NULL, if o_nz is used to specify the nonzero
3581            structure. The size of this array is equal to the number
3582            of local rows, i.e 'm'.
3583 
3584    Output Parameter:
3585 .  A - the matrix
3586 
3587    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3588    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3589    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3590 
3591    Notes:
3592    If the *_nnz parameter is given then the *_nz parameter is ignored
3593 
3594    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3595    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3596    storage requirements for this matrix.
3597 
3598    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3599    processor than it must be used on all processors that share the object for
3600    that argument.
3601 
3602    The user MUST specify either the local or global matrix dimensions
3603    (possibly both).
3604 
3605    The parallel matrix is partitioned across processors such that the
3606    first m0 rows belong to process 0, the next m1 rows belong to
3607    process 1, the next m2 rows belong to process 2 etc.. where
3608    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3609    values corresponding to [m x N] submatrix.
3610 
3611    The columns are logically partitioned with the n0 columns belonging
3612    to 0th partition, the next n1 columns belonging to the next
3613    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3614 
3615    The DIAGONAL portion of the local submatrix on any given processor
3616    is the submatrix corresponding to the rows and columns m,n
3617    corresponding to the given processor. i.e diagonal matrix on
3618    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3619    etc. The remaining portion of the local submatrix [m x (N-n)]
3620    constitute the OFF-DIAGONAL portion. The example below better
3621    illustrates this concept.
3622 
3623    For a square global matrix we define each processor's diagonal portion
3624    to be its local rows and the corresponding columns (a square submatrix);
3625    each processor's off-diagonal portion encompasses the remainder of the
3626    local matrix (a rectangular submatrix).
3627 
3628    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3629 
3630    When calling this routine with a single process communicator, a matrix of
3631    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3632    type of communicator, use the construction mechanism:
3633      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3634 
3635    By default, this format uses inodes (identical nodes) when possible.
3636    We search for consecutive rows with the same nonzero structure, thereby
3637    reusing matrix information to achieve increased efficiency.
3638 
3639    Options Database Keys:
3640 +  -mat_no_inode  - Do not use inodes
3641 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3642 -  -mat_aij_oneindex - Internally use indexing starting at 1
3643         rather than 0.  Note that when calling MatSetValues(),
3644         the user still MUST index entries starting at 0!
3645 
3646 
3647    Example usage:
3648 
3649    Consider the following 8x8 matrix with 34 non-zero values, that is
3650    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3651    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3652    as follows:
3653 
3654 .vb
3655             1  2  0  |  0  3  0  |  0  4
3656     Proc0   0  5  6  |  7  0  0  |  8  0
3657             9  0 10  | 11  0  0  | 12  0
3658     -------------------------------------
3659            13  0 14  | 15 16 17  |  0  0
3660     Proc1   0 18  0  | 19 20 21  |  0  0
3661             0  0  0  | 22 23  0  | 24  0
3662     -------------------------------------
3663     Proc2  25 26 27  |  0  0 28  | 29  0
3664            30  0  0  | 31 32 33  |  0 34
3665 .ve
3666 
3667    This can be represented as a collection of submatrices as:
3668 
3669 .vb
3670       A B C
3671       D E F
3672       G H I
3673 .ve
3674 
3675    Where the submatrices A,B,C are owned by proc0, D,E,F are
3676    owned by proc1, G,H,I are owned by proc2.
3677 
3678    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3679    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3680    The 'M','N' parameters are 8,8, and have the same values on all procs.
3681 
3682    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3683    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3684    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3685    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3686    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3687    matrix, ans [DF] as another SeqAIJ matrix.
3688 
3689    When d_nz, o_nz parameters are specified, d_nz storage elements are
3690    allocated for every row of the local diagonal submatrix, and o_nz
3691    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3692    One way to choose d_nz and o_nz is to use the max nonzerors per local
3693    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3694    In this case, the values of d_nz,o_nz are:
3695 .vb
3696      proc0 : dnz = 2, o_nz = 2
3697      proc1 : dnz = 3, o_nz = 2
3698      proc2 : dnz = 1, o_nz = 4
3699 .ve
3700    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3701    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3702    for proc3. i.e we are using 12+15+10=37 storage locations to store
3703    34 values.
3704 
3705    When d_nnz, o_nnz parameters are specified, the storage is specified
3706    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3707    In the above case the values for d_nnz,o_nnz are:
3708 .vb
3709      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3710      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3711      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3712 .ve
3713    Here the space allocated is sum of all the above values i.e 34, and
3714    hence pre-allocation is perfect.
3715 
3716    Level: intermediate
3717 
3718 .keywords: matrix, aij, compressed row, sparse, parallel
3719 
3720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3721           MPIAIJ, MatCreateMPIAIJWithArrays()
3722 @*/
3723 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3724 {
3725   PetscErrorCode ierr;
3726   PetscMPIInt    size;
3727 
3728   PetscFunctionBegin;
3729   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3730   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3731   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3732   if (size > 1) {
3733     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3734     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3735   } else {
3736     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3737     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3738   }
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3744 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3745 {
3746   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3747 
3748   PetscFunctionBegin;
3749   if (Ad)     *Ad     = a->A;
3750   if (Ao)     *Ao     = a->B;
3751   if (colmap) *colmap = a->garray;
3752   PetscFunctionReturn(0);
3753 }
3754 
3755 #undef __FUNCT__
3756 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3757 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3758 {
3759   PetscErrorCode ierr;
3760   PetscInt       i;
3761   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3762 
3763   PetscFunctionBegin;
3764   if (coloring->ctype == IS_COLORING_GLOBAL) {
3765     ISColoringValue *allcolors,*colors;
3766     ISColoring      ocoloring;
3767 
3768     /* set coloring for diagonal portion */
3769     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3770 
3771     /* set coloring for off-diagonal portion */
3772     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3773     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3774     for (i=0; i<a->B->cmap->n; i++) {
3775       colors[i] = allcolors[a->garray[i]];
3776     }
3777     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3778     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3779     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3780     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3781   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3782     ISColoringValue *colors;
3783     PetscInt        *larray;
3784     ISColoring      ocoloring;
3785 
3786     /* set coloring for diagonal portion */
3787     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3788     for (i=0; i<a->A->cmap->n; i++) {
3789       larray[i] = i + A->cmap->rstart;
3790     }
3791     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3792     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3793     for (i=0; i<a->A->cmap->n; i++) {
3794       colors[i] = coloring->colors[larray[i]];
3795     }
3796     ierr = PetscFree(larray);CHKERRQ(ierr);
3797     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3798     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3799     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3800 
3801     /* set coloring for off-diagonal portion */
3802     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3803     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3804     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3805     for (i=0; i<a->B->cmap->n; i++) {
3806       colors[i] = coloring->colors[larray[i]];
3807     }
3808     ierr = PetscFree(larray);CHKERRQ(ierr);
3809     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3810     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3811     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3812   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3813   PetscFunctionReturn(0);
3814 }
3815 
3816 #undef __FUNCT__
3817 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3818 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3819 {
3820   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3821   PetscErrorCode ierr;
3822 
3823   PetscFunctionBegin;
3824   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3825   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 #undef __FUNCT__
3830 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3831 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3832 {
3833   PetscErrorCode ierr;
3834   PetscInt       m,N,i,rstart,nnz,Ii;
3835   PetscInt       *indx;
3836   PetscScalar    *values;
3837 
3838   PetscFunctionBegin;
3839   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3840   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3841     PetscInt       *dnz,*onz,sum,bs,cbs;
3842 
3843     if (n == PETSC_DECIDE) {
3844       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3845     }
3846     /* Check sum(n) = N */
3847     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3848     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3849 
3850     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3851     rstart -= m;
3852 
3853     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3854     for (i=0; i<m; i++) {
3855       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3856       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3857       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3858     }
3859 
3860     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3861     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3862     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3863     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3864     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3865     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3866     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3867   }
3868 
3869   /* numeric phase */
3870   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3871   for (i=0; i<m; i++) {
3872     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3873     Ii   = i + rstart;
3874     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3875     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3876   }
3877   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881 
3882 #undef __FUNCT__
3883 #define __FUNCT__ "MatFileSplit"
3884 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3885 {
3886   PetscErrorCode    ierr;
3887   PetscMPIInt       rank;
3888   PetscInt          m,N,i,rstart,nnz;
3889   size_t            len;
3890   const PetscInt    *indx;
3891   PetscViewer       out;
3892   char              *name;
3893   Mat               B;
3894   const PetscScalar *values;
3895 
3896   PetscFunctionBegin;
3897   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3898   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3899   /* Should this be the type of the diagonal block of A? */
3900   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3901   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3902   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3903   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3904   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3905   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3906   for (i=0; i<m; i++) {
3907     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3908     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3909     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3910   }
3911   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3912   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3913 
3914   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3915   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3916   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3917   sprintf(name,"%s.%d",outfile,rank);
3918   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3919   ierr = PetscFree(name);CHKERRQ(ierr);
3920   ierr = MatView(B,out);CHKERRQ(ierr);
3921   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3922   ierr = MatDestroy(&B);CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3929 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3930 {
3931   PetscErrorCode      ierr;
3932   Mat_Merge_SeqsToMPI *merge;
3933   PetscContainer      container;
3934 
3935   PetscFunctionBegin;
3936   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3937   if (container) {
3938     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3944     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3945     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3946     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3947     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3948     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3949     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3950     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3951     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3952     ierr = PetscFree(merge);CHKERRQ(ierr);
3953     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3954   }
3955   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3956   PetscFunctionReturn(0);
3957 }
3958 
3959 #include <../src/mat/utils/freespace.h>
3960 #include <petscbt.h>
3961 
3962 #undef __FUNCT__
3963 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3964 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3965 {
3966   PetscErrorCode      ierr;
3967   MPI_Comm            comm;
3968   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3969   PetscMPIInt         size,rank,taga,*len_s;
3970   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3971   PetscInt            proc,m;
3972   PetscInt            **buf_ri,**buf_rj;
3973   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3974   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3975   MPI_Request         *s_waits,*r_waits;
3976   MPI_Status          *status;
3977   MatScalar           *aa=a->a;
3978   MatScalar           **abuf_r,*ba_i;
3979   Mat_Merge_SeqsToMPI *merge;
3980   PetscContainer      container;
3981 
3982   PetscFunctionBegin;
3983   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3984   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3985 
3986   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3987   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3988 
3989   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3990   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3991 
3992   bi     = merge->bi;
3993   bj     = merge->bj;
3994   buf_ri = merge->buf_ri;
3995   buf_rj = merge->buf_rj;
3996 
3997   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3998   owners = merge->rowmap->range;
3999   len_s  = merge->len_s;
4000 
4001   /* send and recv matrix values */
4002   /*-----------------------------*/
4003   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4004   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4005 
4006   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4007   for (proc=0,k=0; proc<size; proc++) {
4008     if (!len_s[proc]) continue;
4009     i    = owners[proc];
4010     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4011     k++;
4012   }
4013 
4014   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4015   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4016   ierr = PetscFree(status);CHKERRQ(ierr);
4017 
4018   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4019   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4020 
4021   /* insert mat values of mpimat */
4022   /*----------------------------*/
4023   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4024   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4025 
4026   for (k=0; k<merge->nrecv; k++) {
4027     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4028     nrows       = *(buf_ri_k[k]);
4029     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4030     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4031   }
4032 
4033   /* set values of ba */
4034   m = merge->rowmap->n;
4035   for (i=0; i<m; i++) {
4036     arow = owners[rank] + i;
4037     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4038     bnzi = bi[i+1] - bi[i];
4039     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4040 
4041     /* add local non-zero vals of this proc's seqmat into ba */
4042     anzi   = ai[arow+1] - ai[arow];
4043     aj     = a->j + ai[arow];
4044     aa     = a->a + ai[arow];
4045     nextaj = 0;
4046     for (j=0; nextaj<anzi; j++) {
4047       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4048         ba_i[j] += aa[nextaj++];
4049       }
4050     }
4051 
4052     /* add received vals into ba */
4053     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4054       /* i-th row */
4055       if (i == *nextrow[k]) {
4056         anzi   = *(nextai[k]+1) - *nextai[k];
4057         aj     = buf_rj[k] + *(nextai[k]);
4058         aa     = abuf_r[k] + *(nextai[k]);
4059         nextaj = 0;
4060         for (j=0; nextaj<anzi; j++) {
4061           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4062             ba_i[j] += aa[nextaj++];
4063           }
4064         }
4065         nextrow[k]++; nextai[k]++;
4066       }
4067     }
4068     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4069   }
4070   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4071   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4072 
4073   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4074   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4075   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4076   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4077   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4078   PetscFunctionReturn(0);
4079 }
4080 
4081 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4082 
4083 #undef __FUNCT__
4084 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4085 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4086 {
4087   PetscErrorCode      ierr;
4088   Mat                 B_mpi;
4089   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4090   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4091   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4092   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4093   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4094   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4095   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4096   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4097   MPI_Status          *status;
4098   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4099   PetscBT             lnkbt;
4100   Mat_Merge_SeqsToMPI *merge;
4101   PetscContainer      container;
4102 
4103   PetscFunctionBegin;
4104   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4105 
4106   /* make sure it is a PETSc comm */
4107   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4110 
4111   ierr = PetscNew(&merge);CHKERRQ(ierr);
4112   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4113 
4114   /* determine row ownership */
4115   /*---------------------------------------------------------*/
4116   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4117   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4118   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4119   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4120   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4121   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4122   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4123 
4124   m      = merge->rowmap->n;
4125   owners = merge->rowmap->range;
4126 
4127   /* determine the number of messages to send, their lengths */
4128   /*---------------------------------------------------------*/
4129   len_s = merge->len_s;
4130 
4131   len          = 0; /* length of buf_si[] */
4132   merge->nsend = 0;
4133   for (proc=0; proc<size; proc++) {
4134     len_si[proc] = 0;
4135     if (proc == rank) {
4136       len_s[proc] = 0;
4137     } else {
4138       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4139       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4140     }
4141     if (len_s[proc]) {
4142       merge->nsend++;
4143       nrows = 0;
4144       for (i=owners[proc]; i<owners[proc+1]; i++) {
4145         if (ai[i+1] > ai[i]) nrows++;
4146       }
4147       len_si[proc] = 2*(nrows+1);
4148       len         += len_si[proc];
4149     }
4150   }
4151 
4152   /* determine the number and length of messages to receive for ij-structure */
4153   /*-------------------------------------------------------------------------*/
4154   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4155   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4156 
4157   /* post the Irecv of j-structure */
4158   /*-------------------------------*/
4159   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4160   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4161 
4162   /* post the Isend of j-structure */
4163   /*--------------------------------*/
4164   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4165 
4166   for (proc=0, k=0; proc<size; proc++) {
4167     if (!len_s[proc]) continue;
4168     i    = owners[proc];
4169     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4170     k++;
4171   }
4172 
4173   /* receives and sends of j-structure are complete */
4174   /*------------------------------------------------*/
4175   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4176   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4177 
4178   /* send and recv i-structure */
4179   /*---------------------------*/
4180   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4181   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4182 
4183   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4184   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4185   for (proc=0,k=0; proc<size; proc++) {
4186     if (!len_s[proc]) continue;
4187     /* form outgoing message for i-structure:
4188          buf_si[0]:                 nrows to be sent
4189                [1:nrows]:           row index (global)
4190                [nrows+1:2*nrows+1]: i-structure index
4191     */
4192     /*-------------------------------------------*/
4193     nrows       = len_si[proc]/2 - 1;
4194     buf_si_i    = buf_si + nrows+1;
4195     buf_si[0]   = nrows;
4196     buf_si_i[0] = 0;
4197     nrows       = 0;
4198     for (i=owners[proc]; i<owners[proc+1]; i++) {
4199       anzi = ai[i+1] - ai[i];
4200       if (anzi) {
4201         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4202         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4203         nrows++;
4204       }
4205     }
4206     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4207     k++;
4208     buf_si += len_si[proc];
4209   }
4210 
4211   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4212   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4213 
4214   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4215   for (i=0; i<merge->nrecv; i++) {
4216     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4217   }
4218 
4219   ierr = PetscFree(len_si);CHKERRQ(ierr);
4220   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4221   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4222   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4223   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4224   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4225   ierr = PetscFree(status);CHKERRQ(ierr);
4226 
4227   /* compute a local seq matrix in each processor */
4228   /*----------------------------------------------*/
4229   /* allocate bi array and free space for accumulating nonzero column info */
4230   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4231   bi[0] = 0;
4232 
4233   /* create and initialize a linked list */
4234   nlnk = N+1;
4235   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4236 
4237   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4238   len  = ai[owners[rank+1]] - ai[owners[rank]];
4239   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4240 
4241   current_space = free_space;
4242 
4243   /* determine symbolic info for each local row */
4244   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4245 
4246   for (k=0; k<merge->nrecv; k++) {
4247     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4248     nrows       = *buf_ri_k[k];
4249     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4250     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4251   }
4252 
4253   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4254   len  = 0;
4255   for (i=0; i<m; i++) {
4256     bnzi = 0;
4257     /* add local non-zero cols of this proc's seqmat into lnk */
4258     arow  = owners[rank] + i;
4259     anzi  = ai[arow+1] - ai[arow];
4260     aj    = a->j + ai[arow];
4261     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4262     bnzi += nlnk;
4263     /* add received col data into lnk */
4264     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4265       if (i == *nextrow[k]) { /* i-th row */
4266         anzi  = *(nextai[k]+1) - *nextai[k];
4267         aj    = buf_rj[k] + *nextai[k];
4268         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4269         bnzi += nlnk;
4270         nextrow[k]++; nextai[k]++;
4271       }
4272     }
4273     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4274 
4275     /* if free space is not available, make more free space */
4276     if (current_space->local_remaining<bnzi) {
4277       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4278       nspacedouble++;
4279     }
4280     /* copy data into free space, then initialize lnk */
4281     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4282     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4283 
4284     current_space->array           += bnzi;
4285     current_space->local_used      += bnzi;
4286     current_space->local_remaining -= bnzi;
4287 
4288     bi[i+1] = bi[i] + bnzi;
4289   }
4290 
4291   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4292 
4293   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4294   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4295   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4296 
4297   /* create symbolic parallel matrix B_mpi */
4298   /*---------------------------------------*/
4299   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4300   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4301   if (n==PETSC_DECIDE) {
4302     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4303   } else {
4304     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4305   }
4306   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4307   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4308   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4309   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4310   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4311 
4312   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4313   B_mpi->assembled    = PETSC_FALSE;
4314   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4315   merge->bi           = bi;
4316   merge->bj           = bj;
4317   merge->buf_ri       = buf_ri;
4318   merge->buf_rj       = buf_rj;
4319   merge->coi          = NULL;
4320   merge->coj          = NULL;
4321   merge->owners_co    = NULL;
4322 
4323   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4324 
4325   /* attach the supporting struct to B_mpi for reuse */
4326   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4327   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4328   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4329   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4330   *mpimat = B_mpi;
4331 
4332   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4333   PetscFunctionReturn(0);
4334 }
4335 
4336 #undef __FUNCT__
4337 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4338 /*@C
4339       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4340                  matrices from each processor
4341 
4342     Collective on MPI_Comm
4343 
4344    Input Parameters:
4345 +    comm - the communicators the parallel matrix will live on
4346 .    seqmat - the input sequential matrices
4347 .    m - number of local rows (or PETSC_DECIDE)
4348 .    n - number of local columns (or PETSC_DECIDE)
4349 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4350 
4351    Output Parameter:
4352 .    mpimat - the parallel matrix generated
4353 
4354     Level: advanced
4355 
4356    Notes:
4357      The dimensions of the sequential matrix in each processor MUST be the same.
4358      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4359      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4360 @*/
4361 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4362 {
4363   PetscErrorCode ierr;
4364   PetscMPIInt    size;
4365 
4366   PetscFunctionBegin;
4367   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4368   if (size == 1) {
4369     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4370     if (scall == MAT_INITIAL_MATRIX) {
4371       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4372     } else {
4373       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4374     }
4375     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4376     PetscFunctionReturn(0);
4377   }
4378   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4379   if (scall == MAT_INITIAL_MATRIX) {
4380     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4381   }
4382   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4383   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #undef __FUNCT__
4388 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4389 /*@
4390      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4391           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4392           with MatGetSize()
4393 
4394     Not Collective
4395 
4396    Input Parameters:
4397 +    A - the matrix
4398 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4399 
4400    Output Parameter:
4401 .    A_loc - the local sequential matrix generated
4402 
4403     Level: developer
4404 
4405 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4406 
4407 @*/
4408 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4409 {
4410   PetscErrorCode ierr;
4411   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4412   Mat_SeqAIJ     *mat,*a,*b;
4413   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4414   MatScalar      *aa,*ba,*cam;
4415   PetscScalar    *ca;
4416   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4417   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4418   PetscBool      match;
4419   MPI_Comm       comm;
4420   PetscMPIInt    size;
4421 
4422   PetscFunctionBegin;
4423   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4424   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4425   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4427   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4428 
4429   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4430   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4431   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4432   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4433   aa = a->a; ba = b->a;
4434   if (scall == MAT_INITIAL_MATRIX) {
4435     if (size == 1) {
4436       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4437       PetscFunctionReturn(0);
4438     }
4439 
4440     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4441     ci[0] = 0;
4442     for (i=0; i<am; i++) {
4443       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4444     }
4445     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4446     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4447     k    = 0;
4448     for (i=0; i<am; i++) {
4449       ncols_o = bi[i+1] - bi[i];
4450       ncols_d = ai[i+1] - ai[i];
4451       /* off-diagonal portion of A */
4452       for (jo=0; jo<ncols_o; jo++) {
4453         col = cmap[*bj];
4454         if (col >= cstart) break;
4455         cj[k]   = col; bj++;
4456         ca[k++] = *ba++;
4457       }
4458       /* diagonal portion of A */
4459       for (j=0; j<ncols_d; j++) {
4460         cj[k]   = cstart + *aj++;
4461         ca[k++] = *aa++;
4462       }
4463       /* off-diagonal portion of A */
4464       for (j=jo; j<ncols_o; j++) {
4465         cj[k]   = cmap[*bj++];
4466         ca[k++] = *ba++;
4467       }
4468     }
4469     /* put together the new matrix */
4470     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4471     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4472     /* Since these are PETSc arrays, change flags to free them as necessary. */
4473     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4474     mat->free_a  = PETSC_TRUE;
4475     mat->free_ij = PETSC_TRUE;
4476     mat->nonew   = 0;
4477   } else if (scall == MAT_REUSE_MATRIX) {
4478     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4479     ci = mat->i; cj = mat->j; cam = mat->a;
4480     for (i=0; i<am; i++) {
4481       /* off-diagonal portion of A */
4482       ncols_o = bi[i+1] - bi[i];
4483       for (jo=0; jo<ncols_o; jo++) {
4484         col = cmap[*bj];
4485         if (col >= cstart) break;
4486         *cam++ = *ba++; bj++;
4487       }
4488       /* diagonal portion of A */
4489       ncols_d = ai[i+1] - ai[i];
4490       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4491       /* off-diagonal portion of A */
4492       for (j=jo; j<ncols_o; j++) {
4493         *cam++ = *ba++; bj++;
4494       }
4495     }
4496   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4497   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 #undef __FUNCT__
4502 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4503 /*@C
4504      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4505 
4506     Not Collective
4507 
4508    Input Parameters:
4509 +    A - the matrix
4510 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4511 -    row, col - index sets of rows and columns to extract (or NULL)
4512 
4513    Output Parameter:
4514 .    A_loc - the local sequential matrix generated
4515 
4516     Level: developer
4517 
4518 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4519 
4520 @*/
4521 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4522 {
4523   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4524   PetscErrorCode ierr;
4525   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4526   IS             isrowa,iscola;
4527   Mat            *aloc;
4528   PetscBool      match;
4529 
4530   PetscFunctionBegin;
4531   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4532   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4533   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4534   if (!row) {
4535     start = A->rmap->rstart; end = A->rmap->rend;
4536     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4537   } else {
4538     isrowa = *row;
4539   }
4540   if (!col) {
4541     start = A->cmap->rstart;
4542     cmap  = a->garray;
4543     nzA   = a->A->cmap->n;
4544     nzB   = a->B->cmap->n;
4545     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4546     ncols = 0;
4547     for (i=0; i<nzB; i++) {
4548       if (cmap[i] < start) idx[ncols++] = cmap[i];
4549       else break;
4550     }
4551     imark = i;
4552     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4553     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4554     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4555   } else {
4556     iscola = *col;
4557   }
4558   if (scall != MAT_INITIAL_MATRIX) {
4559     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4560     aloc[0] = *A_loc;
4561   }
4562   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4563   *A_loc = aloc[0];
4564   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4565   if (!row) {
4566     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4567   }
4568   if (!col) {
4569     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4570   }
4571   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "MatGetBrowsOfAcols"
4577 /*@C
4578     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4579 
4580     Collective on Mat
4581 
4582    Input Parameters:
4583 +    A,B - the matrices in mpiaij format
4584 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4585 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4586 
4587    Output Parameter:
4588 +    rowb, colb - index sets of rows and columns of B to extract
4589 -    B_seq - the sequential matrix generated
4590 
4591     Level: developer
4592 
4593 @*/
4594 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4595 {
4596   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4597   PetscErrorCode ierr;
4598   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4599   IS             isrowb,iscolb;
4600   Mat            *bseq=NULL;
4601 
4602   PetscFunctionBegin;
4603   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4604     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4605   }
4606   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4607 
4608   if (scall == MAT_INITIAL_MATRIX) {
4609     start = A->cmap->rstart;
4610     cmap  = a->garray;
4611     nzA   = a->A->cmap->n;
4612     nzB   = a->B->cmap->n;
4613     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4614     ncols = 0;
4615     for (i=0; i<nzB; i++) {  /* row < local row index */
4616       if (cmap[i] < start) idx[ncols++] = cmap[i];
4617       else break;
4618     }
4619     imark = i;
4620     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4621     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4622     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4623     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4624   } else {
4625     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4626     isrowb  = *rowb; iscolb = *colb;
4627     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4628     bseq[0] = *B_seq;
4629   }
4630   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4631   *B_seq = bseq[0];
4632   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4633   if (!rowb) {
4634     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4635   } else {
4636     *rowb = isrowb;
4637   }
4638   if (!colb) {
4639     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4640   } else {
4641     *colb = iscolb;
4642   }
4643   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4644   PetscFunctionReturn(0);
4645 }
4646 
4647 #undef __FUNCT__
4648 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4649 /*
4650     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4651     of the OFF-DIAGONAL portion of local A
4652 
4653     Collective on Mat
4654 
4655    Input Parameters:
4656 +    A,B - the matrices in mpiaij format
4657 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4658 
4659    Output Parameter:
4660 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4661 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4662 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4663 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4664 
4665     Level: developer
4666 
4667 */
4668 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4669 {
4670   VecScatter_MPI_General *gen_to,*gen_from;
4671   PetscErrorCode         ierr;
4672   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4673   Mat_SeqAIJ             *b_oth;
4674   VecScatter             ctx =a->Mvctx;
4675   MPI_Comm               comm;
4676   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4677   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4678   PetscScalar            *rvalues,*svalues;
4679   MatScalar              *b_otha,*bufa,*bufA;
4680   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4681   MPI_Request            *rwaits = NULL,*swaits = NULL;
4682   MPI_Status             *sstatus,rstatus;
4683   PetscMPIInt            jj,size;
4684   PetscInt               *cols,sbs,rbs;
4685   PetscScalar            *vals;
4686 
4687   PetscFunctionBegin;
4688   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4689   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4690   if (size == 1) PetscFunctionReturn(0);
4691 
4692   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4693     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4694   }
4695   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4696   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4697 
4698   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4699   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4700   rvalues  = gen_from->values; /* holds the length of receiving row */
4701   svalues  = gen_to->values;   /* holds the length of sending row */
4702   nrecvs   = gen_from->n;
4703   nsends   = gen_to->n;
4704 
4705   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4706   srow    = gen_to->indices;    /* local row index to be sent */
4707   sstarts = gen_to->starts;
4708   sprocs  = gen_to->procs;
4709   sstatus = gen_to->sstatus;
4710   sbs     = gen_to->bs;
4711   rstarts = gen_from->starts;
4712   rprocs  = gen_from->procs;
4713   rbs     = gen_from->bs;
4714 
4715   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4716   if (scall == MAT_INITIAL_MATRIX) {
4717     /* i-array */
4718     /*---------*/
4719     /*  post receives */
4720     for (i=0; i<nrecvs; i++) {
4721       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4722       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4723       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4724     }
4725 
4726     /* pack the outgoing message */
4727     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4728 
4729     sstartsj[0] = 0;
4730     rstartsj[0] = 0;
4731     len         = 0; /* total length of j or a array to be sent */
4732     k           = 0;
4733     for (i=0; i<nsends; i++) {
4734       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4735       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4736       for (j=0; j<nrows; j++) {
4737         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4738         for (l=0; l<sbs; l++) {
4739           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4740 
4741           rowlen[j*sbs+l] = ncols;
4742 
4743           len += ncols;
4744           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4745         }
4746         k++;
4747       }
4748       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4749 
4750       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4751     }
4752     /* recvs and sends of i-array are completed */
4753     i = nrecvs;
4754     while (i--) {
4755       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4756     }
4757     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4758 
4759     /* allocate buffers for sending j and a arrays */
4760     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4761     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4762 
4763     /* create i-array of B_oth */
4764     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4765 
4766     b_othi[0] = 0;
4767     len       = 0; /* total length of j or a array to be received */
4768     k         = 0;
4769     for (i=0; i<nrecvs; i++) {
4770       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4771       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4772       for (j=0; j<nrows; j++) {
4773         b_othi[k+1] = b_othi[k] + rowlen[j];
4774         len        += rowlen[j]; k++;
4775       }
4776       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4777     }
4778 
4779     /* allocate space for j and a arrrays of B_oth */
4780     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4781     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4782 
4783     /* j-array */
4784     /*---------*/
4785     /*  post receives of j-array */
4786     for (i=0; i<nrecvs; i++) {
4787       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4788       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4789     }
4790 
4791     /* pack the outgoing message j-array */
4792     k = 0;
4793     for (i=0; i<nsends; i++) {
4794       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4795       bufJ  = bufj+sstartsj[i];
4796       for (j=0; j<nrows; j++) {
4797         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4798         for (ll=0; ll<sbs; ll++) {
4799           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4800           for (l=0; l<ncols; l++) {
4801             *bufJ++ = cols[l];
4802           }
4803           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4804         }
4805       }
4806       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4807     }
4808 
4809     /* recvs and sends of j-array are completed */
4810     i = nrecvs;
4811     while (i--) {
4812       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4813     }
4814     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4815   } else if (scall == MAT_REUSE_MATRIX) {
4816     sstartsj = *startsj_s;
4817     rstartsj = *startsj_r;
4818     bufa     = *bufa_ptr;
4819     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4820     b_otha   = b_oth->a;
4821   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4822 
4823   /* a-array */
4824   /*---------*/
4825   /*  post receives of a-array */
4826   for (i=0; i<nrecvs; i++) {
4827     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4828     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4829   }
4830 
4831   /* pack the outgoing message a-array */
4832   k = 0;
4833   for (i=0; i<nsends; i++) {
4834     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4835     bufA  = bufa+sstartsj[i];
4836     for (j=0; j<nrows; j++) {
4837       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4838       for (ll=0; ll<sbs; ll++) {
4839         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4840         for (l=0; l<ncols; l++) {
4841           *bufA++ = vals[l];
4842         }
4843         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4844       }
4845     }
4846     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4847   }
4848   /* recvs and sends of a-array are completed */
4849   i = nrecvs;
4850   while (i--) {
4851     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4852   }
4853   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4854   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4855 
4856   if (scall == MAT_INITIAL_MATRIX) {
4857     /* put together the new matrix */
4858     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4859 
4860     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4861     /* Since these are PETSc arrays, change flags to free them as necessary. */
4862     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4863     b_oth->free_a  = PETSC_TRUE;
4864     b_oth->free_ij = PETSC_TRUE;
4865     b_oth->nonew   = 0;
4866 
4867     ierr = PetscFree(bufj);CHKERRQ(ierr);
4868     if (!startsj_s || !bufa_ptr) {
4869       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4870       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4871     } else {
4872       *startsj_s = sstartsj;
4873       *startsj_r = rstartsj;
4874       *bufa_ptr  = bufa;
4875     }
4876   }
4877   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4878   PetscFunctionReturn(0);
4879 }
4880 
4881 #undef __FUNCT__
4882 #define __FUNCT__ "MatGetCommunicationStructs"
4883 /*@C
4884   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4885 
4886   Not Collective
4887 
4888   Input Parameters:
4889 . A - The matrix in mpiaij format
4890 
4891   Output Parameter:
4892 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4893 . colmap - A map from global column index to local index into lvec
4894 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4895 
4896   Level: developer
4897 
4898 @*/
4899 #if defined(PETSC_USE_CTABLE)
4900 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4901 #else
4902 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4903 #endif
4904 {
4905   Mat_MPIAIJ *a;
4906 
4907   PetscFunctionBegin;
4908   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4909   PetscValidPointer(lvec, 2);
4910   PetscValidPointer(colmap, 3);
4911   PetscValidPointer(multScatter, 4);
4912   a = (Mat_MPIAIJ*) A->data;
4913   if (lvec) *lvec = a->lvec;
4914   if (colmap) *colmap = a->colmap;
4915   if (multScatter) *multScatter = a->Mvctx;
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4920 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4921 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4922 #if defined(PETSC_HAVE_ELEMENTAL)
4923 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4924 #endif
4925 
4926 #undef __FUNCT__
4927 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4928 /*
4929     Computes (B'*A')' since computing B*A directly is untenable
4930 
4931                n                       p                          p
4932         (              )       (              )         (                  )
4933       m (      A       )  *  n (       B      )   =   m (         C        )
4934         (              )       (              )         (                  )
4935 
4936 */
4937 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4938 {
4939   PetscErrorCode ierr;
4940   Mat            At,Bt,Ct;
4941 
4942   PetscFunctionBegin;
4943   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4944   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4945   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4946   ierr = MatDestroy(&At);CHKERRQ(ierr);
4947   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4948   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4949   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4950   PetscFunctionReturn(0);
4951 }
4952 
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4955 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4956 {
4957   PetscErrorCode ierr;
4958   PetscInt       m=A->rmap->n,n=B->cmap->n;
4959   Mat            Cmat;
4960 
4961   PetscFunctionBegin;
4962   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4963   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4964   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4965   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4966   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4967   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4968   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4969   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4970 
4971   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4972 
4973   *C = Cmat;
4974   PetscFunctionReturn(0);
4975 }
4976 
4977 /* ----------------------------------------------------------------*/
4978 #undef __FUNCT__
4979 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4980 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4981 {
4982   PetscErrorCode ierr;
4983 
4984   PetscFunctionBegin;
4985   if (scall == MAT_INITIAL_MATRIX) {
4986     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4987     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4988     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4989   }
4990   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4991   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4992   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 /*MC
4997    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4998 
4999    Options Database Keys:
5000 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5001 
5002   Level: beginner
5003 
5004 .seealso: MatCreateAIJ()
5005 M*/
5006 
5007 #undef __FUNCT__
5008 #define __FUNCT__ "MatCreate_MPIAIJ"
5009 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5010 {
5011   Mat_MPIAIJ     *b;
5012   PetscErrorCode ierr;
5013   PetscMPIInt    size;
5014 
5015   PetscFunctionBegin;
5016   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5017 
5018   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5019   B->data       = (void*)b;
5020   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5021   B->assembled  = PETSC_FALSE;
5022   B->insertmode = NOT_SET_VALUES;
5023   b->size       = size;
5024 
5025   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5026 
5027   /* build cache for off array entries formed */
5028   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5029 
5030   b->donotstash  = PETSC_FALSE;
5031   b->colmap      = 0;
5032   b->garray      = 0;
5033   b->roworiented = PETSC_TRUE;
5034 
5035   /* stuff used for matrix vector multiply */
5036   b->lvec  = NULL;
5037   b->Mvctx = NULL;
5038 
5039   /* stuff for MatGetRow() */
5040   b->rowindices   = 0;
5041   b->rowvalues    = 0;
5042   b->getrowactive = PETSC_FALSE;
5043 
5044   /* flexible pointer used in CUSP/CUSPARSE classes */
5045   b->spptr = NULL;
5046 
5047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5057 #if defined(PETSC_HAVE_ELEMENTAL)
5058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5059 #endif
5060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5063   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5064   PetscFunctionReturn(0);
5065 }
5066 
5067 #undef __FUNCT__
5068 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5069 /*@C
5070      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5071          and "off-diagonal" part of the matrix in CSR format.
5072 
5073    Collective on MPI_Comm
5074 
5075    Input Parameters:
5076 +  comm - MPI communicator
5077 .  m - number of local rows (Cannot be PETSC_DECIDE)
5078 .  n - This value should be the same as the local size used in creating the
5079        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5080        calculated if N is given) For square matrices n is almost always m.
5081 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5082 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5083 .   i - row indices for "diagonal" portion of matrix
5084 .   j - column indices
5085 .   a - matrix values
5086 .   oi - row indices for "off-diagonal" portion of matrix
5087 .   oj - column indices
5088 -   oa - matrix values
5089 
5090    Output Parameter:
5091 .   mat - the matrix
5092 
5093    Level: advanced
5094 
5095    Notes:
5096        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5097        must free the arrays once the matrix has been destroyed and not before.
5098 
5099        The i and j indices are 0 based
5100 
5101        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5102 
5103        This sets local rows and cannot be used to set off-processor values.
5104 
5105        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5106        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5107        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5108        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5109        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5110        communication if it is known that only local entries will be set.
5111 
5112 .keywords: matrix, aij, compressed row, sparse, parallel
5113 
5114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5115           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5116 C@*/
5117 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5118 {
5119   PetscErrorCode ierr;
5120   Mat_MPIAIJ     *maij;
5121 
5122   PetscFunctionBegin;
5123   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5124   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5125   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5126   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5127   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5128   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5129   maij = (Mat_MPIAIJ*) (*mat)->data;
5130 
5131   (*mat)->preallocated = PETSC_TRUE;
5132 
5133   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5134   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5135 
5136   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5137   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5138 
5139   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5140   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5141   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5142   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5143 
5144   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5145   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5146   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5147   PetscFunctionReturn(0);
5148 }
5149 
5150 /*
5151     Special version for direct calls from Fortran
5152 */
5153 #include <petsc-private/fortranimpl.h>
5154 
5155 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5156 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5157 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5158 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5159 #endif
5160 
5161 /* Change these macros so can be used in void function */
5162 #undef CHKERRQ
5163 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5164 #undef SETERRQ2
5165 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5166 #undef SETERRQ3
5167 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5168 #undef SETERRQ
5169 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5170 
5171 #undef __FUNCT__
5172 #define __FUNCT__ "matsetvaluesmpiaij_"
5173 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5174 {
5175   Mat            mat  = *mmat;
5176   PetscInt       m    = *mm, n = *mn;
5177   InsertMode     addv = *maddv;
5178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5179   PetscScalar    value;
5180   PetscErrorCode ierr;
5181 
5182   MatCheckPreallocated(mat,1);
5183   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5184 
5185 #if defined(PETSC_USE_DEBUG)
5186   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5187 #endif
5188   {
5189     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5190     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5191     PetscBool roworiented = aij->roworiented;
5192 
5193     /* Some Variables required in the macro */
5194     Mat        A                 = aij->A;
5195     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5196     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5197     MatScalar  *aa               = a->a;
5198     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5199     Mat        B                 = aij->B;
5200     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5201     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5202     MatScalar  *ba               = b->a;
5203 
5204     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5205     PetscInt  nonew = a->nonew;
5206     MatScalar *ap1,*ap2;
5207 
5208     PetscFunctionBegin;
5209     for (i=0; i<m; i++) {
5210       if (im[i] < 0) continue;
5211 #if defined(PETSC_USE_DEBUG)
5212       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5213 #endif
5214       if (im[i] >= rstart && im[i] < rend) {
5215         row      = im[i] - rstart;
5216         lastcol1 = -1;
5217         rp1      = aj + ai[row];
5218         ap1      = aa + ai[row];
5219         rmax1    = aimax[row];
5220         nrow1    = ailen[row];
5221         low1     = 0;
5222         high1    = nrow1;
5223         lastcol2 = -1;
5224         rp2      = bj + bi[row];
5225         ap2      = ba + bi[row];
5226         rmax2    = bimax[row];
5227         nrow2    = bilen[row];
5228         low2     = 0;
5229         high2    = nrow2;
5230 
5231         for (j=0; j<n; j++) {
5232           if (roworiented) value = v[i*n+j];
5233           else value = v[i+j*m];
5234           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5235           if (in[j] >= cstart && in[j] < cend) {
5236             col = in[j] - cstart;
5237             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5238           } else if (in[j] < 0) continue;
5239 #if defined(PETSC_USE_DEBUG)
5240           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5241 #endif
5242           else {
5243             if (mat->was_assembled) {
5244               if (!aij->colmap) {
5245                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5246               }
5247 #if defined(PETSC_USE_CTABLE)
5248               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5249               col--;
5250 #else
5251               col = aij->colmap[in[j]] - 1;
5252 #endif
5253               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5254                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5255                 col  =  in[j];
5256                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5257                 B     = aij->B;
5258                 b     = (Mat_SeqAIJ*)B->data;
5259                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5260                 rp2   = bj + bi[row];
5261                 ap2   = ba + bi[row];
5262                 rmax2 = bimax[row];
5263                 nrow2 = bilen[row];
5264                 low2  = 0;
5265                 high2 = nrow2;
5266                 bm    = aij->B->rmap->n;
5267                 ba    = b->a;
5268               }
5269             } else col = in[j];
5270             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5271           }
5272         }
5273       } else if (!aij->donotstash) {
5274         if (roworiented) {
5275           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5276         } else {
5277           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5278         }
5279       }
5280     }
5281   }
5282   PetscFunctionReturnVoid();
5283 }
5284 
5285