xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 101da5f51f3cf6340ec2fbcb4e4bcb2c811922e6)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1156 #endif
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1162 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1163 {
1164   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1165   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1166   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1167   PetscErrorCode ierr;
1168   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1169   int            fd;
1170   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1171   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1172   PetscScalar    *column_values;
1173   PetscInt       message_count,flowcontrolcount;
1174   FILE           *file;
1175 
1176   PetscFunctionBegin;
1177   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1178   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1179   nz   = A->nz + B->nz;
1180   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1181   if (!rank) {
1182     header[0] = MAT_FILE_CLASSID;
1183     header[1] = mat->rmap->N;
1184     header[2] = mat->cmap->N;
1185 
1186     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1187     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     /* get largest number of rows any processor has */
1189     rlen  = mat->rmap->n;
1190     range = mat->rmap->range;
1191     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1192   } else {
1193     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1194     rlen = mat->rmap->n;
1195   }
1196 
1197   /* load up the local row counts */
1198   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1199   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1200 
1201   /* store the row lengths to the file */
1202   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1203   if (!rank) {
1204     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       rlen = range[i+1] - range[i];
1208       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1209       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1210     }
1211     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1212   } else {
1213     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1214     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1215     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1216   }
1217   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1218 
1219   /* load up the local column indices */
1220   nzmax = nz; /* th processor needs space a largest processor needs */
1221   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1223   cnt   = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if ((col = garray[B->j[j]]) > cstart) break;
1227       column_indices[cnt++] = col;
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1230     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column indices to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1254 
1255   /* load up the local column values */
1256   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1257   cnt  = 0;
1258   for (i=0; i<mat->rmap->n; i++) {
1259     for (j=B->i[i]; j<B->i[i+1]; j++) {
1260       if (garray[B->j[j]] > cstart) break;
1261       column_values[cnt++] = B->a[j];
1262     }
1263     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1264     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1265   }
1266   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1267 
1268   /* store the column values to the file */
1269   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1270   if (!rank) {
1271     MPI_Status status;
1272     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1273     for (i=1; i<size; i++) {
1274       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1275       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1276       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1277       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1279     }
1280     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1281   } else {
1282     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1283     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1284     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1286   }
1287   ierr = PetscFree(column_values);CHKERRQ(ierr);
1288 
1289   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1290   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #include <petscdraw.h>
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1297 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1298 {
1299   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1300   PetscErrorCode    ierr;
1301   PetscMPIInt       rank = aij->rank,size = aij->size;
1302   PetscBool         isdraw,iascii,isbinary;
1303   PetscViewer       sviewer;
1304   PetscViewerFormat format;
1305 
1306   PetscFunctionBegin;
1307   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1308   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1309   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1310   if (iascii) {
1311     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscBool inodes;
1315 
1316       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1317       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1318       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1320       if (!inodes) {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1323       } else {
1324         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1325                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1326       }
1327       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1331       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1332       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1334       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1335       PetscFunctionReturn(0);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount,inodelimit,*inodes;
1338       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1339       if (inodes) {
1340         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1341       } else {
1342         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1343       }
1344       PetscFunctionReturn(0);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(0);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1351       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1352     } else {
1353       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1354     }
1355     PetscFunctionReturn(0);
1356   } else if (isdraw) {
1357     PetscDraw draw;
1358     PetscBool isnull;
1359     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1360     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1361     if (isnull) PetscFunctionReturn(0);
1362   }
1363 
1364   {
1365     /* assemble the entire matrix onto first processor. */
1366     Mat        A;
1367     Mat_SeqAIJ *Aloc;
1368     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1369     MatScalar  *a;
1370 
1371     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1372     if (!rank) {
1373       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1374     } else {
1375       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1376     }
1377     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1378     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1379     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1380     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1381     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1382 
1383     /* copy over the A part */
1384     Aloc = (Mat_SeqAIJ*)aij->A->data;
1385     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1388     for (i=0; i<m; i++) {
1389       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1390       row++;
1391       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1392     }
1393     aj = Aloc->j;
1394     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1395 
1396     /* copy over the B part */
1397     Aloc = (Mat_SeqAIJ*)aij->B->data;
1398     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1399     row  = mat->rmap->rstart;
1400     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1401     ct   = cols;
1402     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1403     for (i=0; i<m; i++) {
1404       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1405       row++;
1406       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1407     }
1408     ierr = PetscFree(ct);CHKERRQ(ierr);
1409     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     /*
1412        Everyone has to call to draw the matrix since the graphics waits are
1413        synchronized across all processors that share the PetscDraw object
1414     */
1415     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1416     if (!rank) {
1417       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1418       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1421     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1422     ierr = MatDestroy(&A);CHKERRQ(ierr);
1423   }
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatView_MPIAIJ"
1429 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1430 {
1431   PetscErrorCode ierr;
1432   PetscBool      iascii,isdraw,issocket,isbinary;
1433 
1434   PetscFunctionBegin;
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1439   if (iascii || isdraw || isbinary || issocket) {
1440     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatSOR_MPIAIJ"
1447 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1448 {
1449   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1450   PetscErrorCode ierr;
1451   Vec            bb1 = 0;
1452   PetscBool      hasop;
1453 
1454   PetscFunctionBegin;
1455   if (flag == SOR_APPLY_UPPER) {
1456     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457     PetscFunctionReturn(0);
1458   }
1459 
1460   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1461     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1462   }
1463 
1464   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1465     if (flag & SOR_ZERO_INITIAL_GUESS) {
1466       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1467       its--;
1468     }
1469 
1470     while (its--) {
1471       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473 
1474       /* update rhs: bb1 = bb - B*x */
1475       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1476       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1477 
1478       /* local sweep */
1479       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1480     }
1481   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1482     if (flag & SOR_ZERO_INITIAL_GUESS) {
1483       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1484       its--;
1485     }
1486     while (its--) {
1487       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489 
1490       /* update rhs: bb1 = bb - B*x */
1491       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1492       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1493 
1494       /* local sweep */
1495       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1496     }
1497   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1498     if (flag & SOR_ZERO_INITIAL_GUESS) {
1499       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1500       its--;
1501     }
1502     while (its--) {
1503       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505 
1506       /* update rhs: bb1 = bb - B*x */
1507       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1508       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1509 
1510       /* local sweep */
1511       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1512     }
1513   } else if (flag & SOR_EISENSTAT) {
1514     Vec xx1;
1515 
1516     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1517     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1518 
1519     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     if (!mat->diag) {
1522       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1523       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1524     }
1525     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1526     if (hasop) {
1527       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1528     } else {
1529       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1530     }
1531     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1532 
1533     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1534 
1535     /* local sweep */
1536     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1537     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1538     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1539   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1540 
1541   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1542 
1543   matin->errortype = mat->A->errortype;
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatPermute_MPIAIJ"
1549 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1550 {
1551   Mat            aA,aB,Aperm;
1552   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1553   PetscScalar    *aa,*ba;
1554   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1555   PetscSF        rowsf,sf;
1556   IS             parcolp = NULL;
1557   PetscBool      done;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1562   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1563   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1564   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1565 
1566   /* Invert row permutation to find out where my rows should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1570   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1571   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1573 
1574   /* Invert column permutation to find out where my columns should go */
1575   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1576   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1577   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1578   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1579   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1580   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1581   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1582 
1583   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1584   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1585   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1586 
1587   /* Find out where my gcols should go */
1588   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1589   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1593   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1594   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1595   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1596 
1597   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1598   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1599   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1600   for (i=0; i<m; i++) {
1601     PetscInt row = rdest[i],rowner;
1602     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1603     for (j=ai[i]; j<ai[i+1]; j++) {
1604       PetscInt cowner,col = cdest[aj[j]];
1605       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1606       if (rowner == cowner) dnnz[i]++;
1607       else onnz[i]++;
1608     }
1609     for (j=bi[i]; j<bi[i+1]; j++) {
1610       PetscInt cowner,col = gcdest[bj[j]];
1611       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1612       if (rowner == cowner) dnnz[i]++;
1613       else onnz[i]++;
1614     }
1615   }
1616   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1617   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1618   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1619   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1620   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1621 
1622   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1623   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1624   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1625   for (i=0; i<m; i++) {
1626     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1627     PetscInt j0,rowlen;
1628     rowlen = ai[i+1] - ai[i];
1629     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1630       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1631       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1632     }
1633     rowlen = bi[i+1] - bi[i];
1634     for (j0=j=0; j<rowlen; j0=j) {
1635       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1636       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1637     }
1638   }
1639   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1640   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1642   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1643   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1644   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1645   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1646   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1647   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1648   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1649   *B = Aperm;
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1655 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1656 {
1657   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1658   PetscErrorCode ierr;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1662   if (ghosts) *ghosts = aij->garray;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1668 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1669 {
1670   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1671   Mat            A    = mat->A,B = mat->B;
1672   PetscErrorCode ierr;
1673   PetscReal      isend[5],irecv[5];
1674 
1675   PetscFunctionBegin;
1676   info->block_size = 1.0;
1677   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1678 
1679   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1680   isend[3] = info->memory;  isend[4] = info->mallocs;
1681 
1682   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1683 
1684   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1685   isend[3] += info->memory;  isend[4] += info->mallocs;
1686   if (flag == MAT_LOCAL) {
1687     info->nz_used      = isend[0];
1688     info->nz_allocated = isend[1];
1689     info->nz_unneeded  = isend[2];
1690     info->memory       = isend[3];
1691     info->mallocs      = isend[4];
1692   } else if (flag == MAT_GLOBAL_MAX) {
1693     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1694 
1695     info->nz_used      = irecv[0];
1696     info->nz_allocated = irecv[1];
1697     info->nz_unneeded  = irecv[2];
1698     info->memory       = irecv[3];
1699     info->mallocs      = irecv[4];
1700   } else if (flag == MAT_GLOBAL_SUM) {
1701     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1702 
1703     info->nz_used      = irecv[0];
1704     info->nz_allocated = irecv[1];
1705     info->nz_unneeded  = irecv[2];
1706     info->memory       = irecv[3];
1707     info->mallocs      = irecv[4];
1708   }
1709   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1710   info->fill_ratio_needed = 0;
1711   info->factor_mallocs    = 0;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatSetOption_MPIAIJ"
1717 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1718 {
1719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   switch (op) {
1724   case MAT_NEW_NONZERO_LOCATIONS:
1725   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1726   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1727   case MAT_KEEP_NONZERO_PATTERN:
1728   case MAT_NEW_NONZERO_LOCATION_ERR:
1729   case MAT_USE_INODES:
1730   case MAT_IGNORE_ZERO_ENTRIES:
1731     MatCheckPreallocated(A,1);
1732     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1733     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_ROW_ORIENTED:
1736     MatCheckPreallocated(A,1);
1737     a->roworiented = flg;
1738 
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1741     break;
1742   case MAT_NEW_DIAGONALS:
1743     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1744     break;
1745   case MAT_IGNORE_OFF_PROC_ENTRIES:
1746     a->donotstash = flg;
1747     break;
1748   case MAT_SPD:
1749     A->spd_set = PETSC_TRUE;
1750     A->spd     = flg;
1751     if (flg) {
1752       A->symmetric                  = PETSC_TRUE;
1753       A->structurally_symmetric     = PETSC_TRUE;
1754       A->symmetric_set              = PETSC_TRUE;
1755       A->structurally_symmetric_set = PETSC_TRUE;
1756     }
1757     break;
1758   case MAT_SYMMETRIC:
1759     MatCheckPreallocated(A,1);
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_STRUCTURALLY_SYMMETRIC:
1763     MatCheckPreallocated(A,1);
1764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_HERMITIAN:
1767     MatCheckPreallocated(A,1);
1768     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_SYMMETRY_ETERNAL:
1771     MatCheckPreallocated(A,1);
1772     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1773     break;
1774   default:
1775     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatGetRow_MPIAIJ"
1782 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1783 {
1784   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1785   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1786   PetscErrorCode ierr;
1787   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1788   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1789   PetscInt       *cmap,*idx_p;
1790 
1791   PetscFunctionBegin;
1792   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1793   mat->getrowactive = PETSC_TRUE;
1794 
1795   if (!mat->rowvalues && (idx || v)) {
1796     /*
1797         allocate enough space to hold information from the longest row.
1798     */
1799     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1800     PetscInt   max = 1,tmp;
1801     for (i=0; i<matin->rmap->n; i++) {
1802       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1803       if (max < tmp) max = tmp;
1804     }
1805     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1806   }
1807 
1808   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1809   lrow = row - rstart;
1810 
1811   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1812   if (!v)   {pvA = 0; pvB = 0;}
1813   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1814   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1815   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1816   nztot = nzA + nzB;
1817 
1818   cmap = mat->garray;
1819   if (v  || idx) {
1820     if (nztot) {
1821       /* Sort by increasing column numbers, assuming A and B already sorted */
1822       PetscInt imark = -1;
1823       if (v) {
1824         *v = v_p = mat->rowvalues;
1825         for (i=0; i<nzB; i++) {
1826           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1827           else break;
1828         }
1829         imark = i;
1830         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1831         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1832       }
1833       if (idx) {
1834         *idx = idx_p = mat->rowindices;
1835         if (imark > -1) {
1836           for (i=0; i<imark; i++) {
1837             idx_p[i] = cmap[cworkB[i]];
1838           }
1839         } else {
1840           for (i=0; i<nzB; i++) {
1841             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1842             else break;
1843           }
1844           imark = i;
1845         }
1846         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1847         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1848       }
1849     } else {
1850       if (idx) *idx = 0;
1851       if (v)   *v   = 0;
1852     }
1853   }
1854   *nz  = nztot;
1855   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1856   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1862 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1863 {
1864   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1865 
1866   PetscFunctionBegin;
1867   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1868   aij->getrowactive = PETSC_FALSE;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatNorm_MPIAIJ"
1874 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1875 {
1876   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1877   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1878   PetscErrorCode ierr;
1879   PetscInt       i,j,cstart = mat->cmap->rstart;
1880   PetscReal      sum = 0.0;
1881   MatScalar      *v;
1882 
1883   PetscFunctionBegin;
1884   if (aij->size == 1) {
1885     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1886   } else {
1887     if (type == NORM_FROBENIUS) {
1888       v = amat->a;
1889       for (i=0; i<amat->nz; i++) {
1890         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1891       }
1892       v = bmat->a;
1893       for (i=0; i<bmat->nz; i++) {
1894         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1895       }
1896       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1897       *norm = PetscSqrtReal(*norm);
1898       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1899     } else if (type == NORM_1) { /* max column norm */
1900       PetscReal *tmp,*tmp2;
1901       PetscInt  *jj,*garray = aij->garray;
1902       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1903       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1904       *norm = 0.0;
1905       v     = amat->a; jj = amat->j;
1906       for (j=0; j<amat->nz; j++) {
1907         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1908       }
1909       v = bmat->a; jj = bmat->j;
1910       for (j=0; j<bmat->nz; j++) {
1911         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1912       }
1913       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1914       for (j=0; j<mat->cmap->N; j++) {
1915         if (tmp2[j] > *norm) *norm = tmp2[j];
1916       }
1917       ierr = PetscFree(tmp);CHKERRQ(ierr);
1918       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1919       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1920     } else if (type == NORM_INFINITY) { /* max row norm */
1921       PetscReal ntemp = 0.0;
1922       for (j=0; j<aij->A->rmap->n; j++) {
1923         v   = amat->a + amat->i[j];
1924         sum = 0.0;
1925         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1926           sum += PetscAbsScalar(*v); v++;
1927         }
1928         v = bmat->a + bmat->i[j];
1929         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1930           sum += PetscAbsScalar(*v); v++;
1931         }
1932         if (sum > ntemp) ntemp = sum;
1933       }
1934       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1935       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1936     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatTranspose_MPIAIJ"
1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1944 {
1945   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1946   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1947   PetscErrorCode ierr;
1948   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1949   PetscInt       cstart = A->cmap->rstart,ncol;
1950   Mat            B;
1951   MatScalar      *array;
1952 
1953   PetscFunctionBegin;
1954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1955 
1956   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1957   ai = Aloc->i; aj = Aloc->j;
1958   bi = Bloc->i; bj = Bloc->j;
1959   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1960     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1961     PetscSFNode          *oloc;
1962     PETSC_UNUSED PetscSF sf;
1963 
1964     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1965     /* compute d_nnz for preallocation */
1966     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) {
1968       d_nnz[aj[i]]++;
1969       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1970     }
1971     /* compute local off-diagonal contributions */
1972     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1973     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1974     /* map those to global */
1975     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1976     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1977     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1978     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1979     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1980     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1982 
1983     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1984     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1985     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1986     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1987     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1988     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1989   } else {
1990     B    = *matout;
1991     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1992     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1993   }
1994 
1995   /* copy over the A part */
1996   array = Aloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<ma; i++) {
1999     ncol = ai[i+1]-ai[i];
2000     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2001     row++;
2002     array += ncol; aj += ncol;
2003   }
2004   aj = Aloc->j;
2005   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2006 
2007   /* copy over the B part */
2008   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2009   array = Bloc->a;
2010   row   = A->rmap->rstart;
2011   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2012   cols_tmp = cols;
2013   for (i=0; i<mb; i++) {
2014     ncol = bi[i+1]-bi[i];
2015     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; cols_tmp += ncol;
2018   }
2019   ierr = PetscFree(cols);CHKERRQ(ierr);
2020 
2021   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2024     *matout = B;
2025   } else {
2026     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2034 {
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            a    = aij->A,b = aij->B;
2037   PetscErrorCode ierr;
2038   PetscInt       s1,s2,s3;
2039 
2040   PetscFunctionBegin;
2041   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2042   if (rr) {
2043     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2044     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2045     /* Overlap communication with computation. */
2046     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2047   }
2048   if (ll) {
2049     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2050     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2051     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2052   }
2053   /* scale  the diagonal block */
2054   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2055 
2056   if (rr) {
2057     /* Do a scatter end and then right scale the off-diagonal block */
2058     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2059     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2067 {
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   PetscErrorCode ierr;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatEqual_MPIAIJ"
2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2079 {
2080   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2081   Mat            a,b,c,d;
2082   PetscBool      flg;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   a = matA->A; b = matA->B;
2087   c = matB->A; d = matB->B;
2088 
2089   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2090   if (flg) {
2091     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2092   }
2093   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatCopy_MPIAIJ"
2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2100 {
2101   PetscErrorCode ierr;
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2104 
2105   PetscFunctionBegin;
2106   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2107   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2108     /* because of the column compression in the off-processor part of the matrix a->B,
2109        the number of columns in a->B and b->B may be different, hence we cannot call
2110        the MatCopy() directly on the two parts. If need be, we can provide a more
2111        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2112        then copying the submatrices */
2113     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2116     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetUp_MPIAIJ"
2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2124 {
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /*
2133    Computes the number of nonzeros per row needed for preallocation when X and Y
2134    have different nonzero structure.
2135 */
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2139 {
2140   PetscInt       i,j,k,nzx,nzy;
2141 
2142   PetscFunctionBegin;
2143   /* Set the number of nonzeros in the new matrix */
2144   for (i=0; i<m; i++) {
2145     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2146     nzx = xi[i+1] - xi[i];
2147     nzy = yi[i+1] - yi[i];
2148     nnz[i] = 0;
2149     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2150       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2151       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2152       nnz[i]++;
2153     }
2154     for (; k<nzy; k++) nnz[i]++;
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2163 {
2164   PetscErrorCode ierr;
2165   PetscInt       m = Y->rmap->N;
2166   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2167   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2168 
2169   PetscFunctionBegin;
2170   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatAXPY_MPIAIJ"
2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2177 {
2178   PetscErrorCode ierr;
2179   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2180   PetscBLASInt   bnz,one=1;
2181   Mat_SeqAIJ     *x,*y;
2182 
2183   PetscFunctionBegin;
2184   if (str == SAME_NONZERO_PATTERN) {
2185     PetscScalar alpha = a;
2186     x    = (Mat_SeqAIJ*)xx->A->data;
2187     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2188     y    = (Mat_SeqAIJ*)yy->A->data;
2189     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2190     x    = (Mat_SeqAIJ*)xx->B->data;
2191     y    = (Mat_SeqAIJ*)yy->B->data;
2192     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2193     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2194     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2195   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2196     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2197   } else {
2198     Mat      B;
2199     PetscInt *nnz_d,*nnz_o;
2200     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2201     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2202     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2203     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2204     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2205     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2206     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2207     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2209     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2210     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2211     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2212     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatConjugate_MPIAIJ"
2222 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2223 {
2224 #if defined(PETSC_USE_COMPLEX)
2225   PetscErrorCode ierr;
2226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2230   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2231 #else
2232   PetscFunctionBegin;
2233 #endif
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatRealPart_MPIAIJ"
2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2246   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2253 {
2254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2255   PetscErrorCode ierr;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2259   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2265 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269   PetscInt       i,*idxb = 0;
2270   PetscScalar    *va,*vb;
2271   Vec            vtmp;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2275   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2276   if (idx) {
2277     for (i=0; i<A->rmap->n; i++) {
2278       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2279     }
2280   }
2281 
2282   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2283   if (idx) {
2284     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2285   }
2286   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2287   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2288 
2289   for (i=0; i<A->rmap->n; i++) {
2290     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2291       va[i] = vb[i];
2292       if (idx) idx[i] = a->garray[idxb[i]];
2293     }
2294   }
2295 
2296   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2298   ierr = PetscFree(idxb);CHKERRQ(ierr);
2299   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2305 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2306 {
2307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2308   PetscErrorCode ierr;
2309   PetscInt       i,*idxb = 0;
2310   PetscScalar    *va,*vb;
2311   Vec            vtmp;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2315   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2316   if (idx) {
2317     for (i=0; i<A->cmap->n; i++) {
2318       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2319     }
2320   }
2321 
2322   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2323   if (idx) {
2324     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2325   }
2326   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2327   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2328 
2329   for (i=0; i<A->rmap->n; i++) {
2330     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2331       va[i] = vb[i];
2332       if (idx) idx[i] = a->garray[idxb[i]];
2333     }
2334   }
2335 
2336   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2338   ierr = PetscFree(idxb);CHKERRQ(ierr);
2339   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2348   PetscInt       n      = A->rmap->n;
2349   PetscInt       cstart = A->cmap->rstart;
2350   PetscInt       *cmap  = mat->garray;
2351   PetscInt       *diagIdx, *offdiagIdx;
2352   Vec            diagV, offdiagV;
2353   PetscScalar    *a, *diagA, *offdiagA;
2354   PetscInt       r;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2361   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2364   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2365   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2366   for (r = 0; r < n; ++r) {
2367     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2368       a[r]   = diagA[r];
2369       idx[r] = cstart + diagIdx[r];
2370     } else {
2371       a[r]   = offdiagA[r];
2372       idx[r] = cmap[offdiagIdx[r]];
2373     }
2374   }
2375   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2378   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2379   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2380   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2386 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2389   PetscInt       n      = A->rmap->n;
2390   PetscInt       cstart = A->cmap->rstart;
2391   PetscInt       *cmap  = mat->garray;
2392   PetscInt       *diagIdx, *offdiagIdx;
2393   Vec            diagV, offdiagV;
2394   PetscScalar    *a, *diagA, *offdiagA;
2395   PetscInt       r;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2402   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2403   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   for (r = 0; r < n; ++r) {
2408     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       idx[r] = cstart + diagIdx[r];
2411     } else {
2412       a[r]   = offdiagA[r];
2413       idx[r] = cmap[offdiagIdx[r]];
2414     }
2415   }
2416   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2427 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2428 {
2429   PetscErrorCode ierr;
2430   Mat            *dummy;
2431 
2432   PetscFunctionBegin;
2433   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2434   *newmat = *dummy;
2435   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2441 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2442 {
2443   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2444   PetscErrorCode ierr;
2445 
2446   PetscFunctionBegin;
2447   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2448   A->errortype = a->A->errortype;
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2454 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2455 {
2456   PetscErrorCode ierr;
2457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2458 
2459   PetscFunctionBegin;
2460   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2461   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2462   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2463   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2469 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2470 {
2471   PetscFunctionBegin;
2472   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2473   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2479 /*@
2480    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2481 
2482    Collective on Mat
2483 
2484    Input Parameters:
2485 +    A - the matrix
2486 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2487 
2488  Level: advanced
2489 
2490 @*/
2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2492 {
2493   PetscErrorCode       ierr;
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2502 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2503 {
2504   PetscErrorCode       ierr;
2505   PetscBool            sc = PETSC_FALSE,flg;
2506 
2507   PetscFunctionBegin;
2508   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2509   ierr = PetscObjectOptionsBegin((PetscObject)A);
2510     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2511     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2512     if (flg) {
2513       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2514     }
2515   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatShift_MPIAIJ"
2521 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2522 {
2523   PetscErrorCode ierr;
2524   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2525   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2526 
2527   PetscFunctionBegin;
2528   if (!Y->preallocated) {
2529     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2530   } else if (!aij->nz) {
2531     PetscInt nonew = aij->nonew;
2532     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2533     aij->nonew = nonew;
2534   }
2535   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2541 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2542 {
2543   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2544   PetscErrorCode ierr;
2545 
2546   PetscFunctionBegin;
2547   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2548   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2549   if (d) {
2550     PetscInt rstart;
2551     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2552     *d += rstart;
2553 
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2561                                        MatGetRow_MPIAIJ,
2562                                        MatRestoreRow_MPIAIJ,
2563                                        MatMult_MPIAIJ,
2564                                 /* 4*/ MatMultAdd_MPIAIJ,
2565                                        MatMultTranspose_MPIAIJ,
2566                                        MatMultTransposeAdd_MPIAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIAIJ,
2574                                        MatTranspose_MPIAIJ,
2575                                 /*15*/ MatGetInfo_MPIAIJ,
2576                                        MatEqual_MPIAIJ,
2577                                        MatGetDiagonal_MPIAIJ,
2578                                        MatDiagonalScale_MPIAIJ,
2579                                        MatNorm_MPIAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2581                                        MatAssemblyEnd_MPIAIJ,
2582                                        MatSetOption_MPIAIJ,
2583                                        MatZeroEntries_MPIAIJ,
2584                                 /*24*/ MatZeroRows_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        MatGetDiagonalBlock_MPIAIJ,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIAIJ,
2600                                        MatGetSubMatrices_MPIAIJ,
2601                                        MatIncreaseOverlap_MPIAIJ,
2602                                        MatGetValues_MPIAIJ,
2603                                        MatCopy_MPIAIJ,
2604                                 /*44*/ MatGetRowMax_MPIAIJ,
2605                                        MatScale_MPIAIJ,
2606                                        MatShift_MPIAIJ,
2607                                        MatDiagonalSet_MPIAIJ,
2608                                        MatZeroRowsColumns_MPIAIJ,
2609                                 /*49*/ MatSetRandom_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIAIJ,
2617                                        MatPermute_MPIAIJ,
2618                                        0,
2619                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2620                                        MatDestroy_MPIAIJ,
2621                                        MatView_MPIAIJ,
2622                                        0,
2623                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2624                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2625                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2630                                        MatGetRowMinAbs_MPIAIJ,
2631                                        0,
2632                                        MatSetColoring_MPIAIJ,
2633                                        0,
2634                                        MatSetValuesAdifor_MPIAIJ,
2635                                 /*75*/ MatFDColoringApply_AIJ,
2636                                        MatSetFromOptions_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        MatFindZeroDiagonals_MPIAIJ,
2640                                 /*80*/ 0,
2641                                        0,
2642                                        0,
2643                                 /*83*/ MatLoad_MPIAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2650                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2651                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2652                                        MatPtAP_MPIAIJ_MPIAIJ,
2653                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2654                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        MatConjugate_MPIAIJ,
2663                                        0,
2664                                 /*104*/MatSetValuesRow_MPIAIJ,
2665                                        MatRealPart_MPIAIJ,
2666                                        MatImaginaryPart_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        MatGetRowMin_MPIAIJ,
2672                                        0,
2673                                        MatMissingDiagonal_MPIAIJ,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIAIJ,
2684                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2685                                        MatGetColumnNorms_MPIAIJ,
2686                                        MatInvertBlockDiagonal_MPIAIJ,
2687                                        0,
2688                                        MatGetSubMatricesMPI_MPIAIJ,
2689                                 /*129*/0,
2690                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2691                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2692                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2693                                        0,
2694                                 /*134*/0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                 /*139*/0,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ,
2703                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2704                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2705 };
2706 
2707 /* ----------------------------------------------------------------------------------------*/
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2711 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2712 {
2713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2718   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2724 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2725 {
2726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2731   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2737 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2738 {
2739   Mat_MPIAIJ     *b;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2745   b = (Mat_MPIAIJ*)B->data;
2746 
2747   if (!B->preallocated) {
2748     /* Explicitly create 2 MATSEQAIJ matrices. */
2749     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2750     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2751     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2752     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2753     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2754     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2755     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2756     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2757     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2758     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2759   }
2760 
2761   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2762   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2763   B->preallocated = PETSC_TRUE;
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2770 {
2771   Mat            mat;
2772   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2773   PetscErrorCode ierr;
2774 
2775   PetscFunctionBegin;
2776   *newmat = 0;
2777   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2778   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2779   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2780   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2781   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2782   a       = (Mat_MPIAIJ*)mat->data;
2783 
2784   mat->factortype   = matin->factortype;
2785   mat->assembled    = PETSC_TRUE;
2786   mat->insertmode   = NOT_SET_VALUES;
2787   mat->preallocated = PETSC_TRUE;
2788 
2789   a->size         = oldmat->size;
2790   a->rank         = oldmat->rank;
2791   a->donotstash   = oldmat->donotstash;
2792   a->roworiented  = oldmat->roworiented;
2793   a->rowindices   = 0;
2794   a->rowvalues    = 0;
2795   a->getrowactive = PETSC_FALSE;
2796 
2797   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2798   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2799 
2800   if (oldmat->colmap) {
2801 #if defined(PETSC_USE_CTABLE)
2802     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2803 #else
2804     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2805     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2806     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2807 #endif
2808   } else a->colmap = 0;
2809   if (oldmat->garray) {
2810     PetscInt len;
2811     len  = oldmat->B->cmap->n;
2812     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2813     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2814     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2815   } else a->garray = 0;
2816 
2817   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2818   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2819   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2820   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2821   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2822   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2823   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2824   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2825   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2826   *newmat = mat;
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 
2831 
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatLoad_MPIAIJ"
2834 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2835 {
2836   PetscScalar    *vals,*svals;
2837   MPI_Comm       comm;
2838   PetscErrorCode ierr;
2839   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2840   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2841   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2842   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2843   PetscInt       cend,cstart,n,*rowners;
2844   int            fd;
2845   PetscInt       bs = newMat->rmap->bs;
2846 
2847   PetscFunctionBegin;
2848   /* force binary viewer to load .info file if it has not yet done so */
2849   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2850   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2851   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2852   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2853   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2854   if (!rank) {
2855     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2856     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2857     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ");
2858   }
2859 
2860   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2861   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2862   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2863   if (bs < 0) bs = 1;
2864 
2865   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2866   M    = header[1]; N = header[2];
2867 
2868   /* If global sizes are set, check if they are consistent with that given in the file */
2869   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2870   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2871 
2872   /* determine ownership of all (block) rows */
2873   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2874   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2875   else m = newMat->rmap->n; /* Set by user */
2876 
2877   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2878   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2879 
2880   /* First process needs enough room for process with most rows */
2881   if (!rank) {
2882     mmax = rowners[1];
2883     for (i=2; i<=size; i++) {
2884       mmax = PetscMax(mmax, rowners[i]);
2885     }
2886   } else mmax = -1;             /* unused, but compilers complain */
2887 
2888   rowners[0] = 0;
2889   for (i=2; i<=size; i++) {
2890     rowners[i] += rowners[i-1];
2891   }
2892   rstart = rowners[rank];
2893   rend   = rowners[rank+1];
2894 
2895   /* distribute row lengths to all processors */
2896   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2897   if (!rank) {
2898     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2899     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2900     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2901     for (j=0; j<m; j++) {
2902       procsnz[0] += ourlens[j];
2903     }
2904     for (i=1; i<size; i++) {
2905       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2906       /* calculate the number of nonzeros on each processor */
2907       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2908         procsnz[i] += rowlengths[j];
2909       }
2910       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2911     }
2912     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2913   } else {
2914     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2915   }
2916 
2917   if (!rank) {
2918     /* determine max buffer needed and allocate it */
2919     maxnz = 0;
2920     for (i=0; i<size; i++) {
2921       maxnz = PetscMax(maxnz,procsnz[i]);
2922     }
2923     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2924 
2925     /* read in my part of the matrix column indices  */
2926     nz   = procsnz[0];
2927     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2928     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2929 
2930     /* read in every one elses and ship off */
2931     for (i=1; i<size; i++) {
2932       nz   = procsnz[i];
2933       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2934       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2935     }
2936     ierr = PetscFree(cols);CHKERRQ(ierr);
2937   } else {
2938     /* determine buffer space needed for message */
2939     nz = 0;
2940     for (i=0; i<m; i++) {
2941       nz += ourlens[i];
2942     }
2943     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2944 
2945     /* receive message of column indices*/
2946     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2947   }
2948 
2949   /* determine column ownership if matrix is not square */
2950   if (N != M) {
2951     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2952     else n = newMat->cmap->n;
2953     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2954     cstart = cend - n;
2955   } else {
2956     cstart = rstart;
2957     cend   = rend;
2958     n      = cend - cstart;
2959   }
2960 
2961   /* loop over local rows, determining number of off diagonal entries */
2962   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2963   jj   = 0;
2964   for (i=0; i<m; i++) {
2965     for (j=0; j<ourlens[i]; j++) {
2966       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2967       jj++;
2968     }
2969   }
2970 
2971   for (i=0; i<m; i++) {
2972     ourlens[i] -= offlens[i];
2973   }
2974   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2975 
2976   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2977 
2978   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2979 
2980   for (i=0; i<m; i++) {
2981     ourlens[i] += offlens[i];
2982   }
2983 
2984   if (!rank) {
2985     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2986 
2987     /* read in my part of the matrix numerical values  */
2988     nz   = procsnz[0];
2989     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2990 
2991     /* insert into matrix */
2992     jj      = rstart;
2993     smycols = mycols;
2994     svals   = vals;
2995     for (i=0; i<m; i++) {
2996       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2997       smycols += ourlens[i];
2998       svals   += ourlens[i];
2999       jj++;
3000     }
3001 
3002     /* read in other processors and ship out */
3003     for (i=1; i<size; i++) {
3004       nz   = procsnz[i];
3005       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3006       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3007     }
3008     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3009   } else {
3010     /* receive numeric values */
3011     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3012 
3013     /* receive message of values*/
3014     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3015 
3016     /* insert into matrix */
3017     jj      = rstart;
3018     smycols = mycols;
3019     svals   = vals;
3020     for (i=0; i<m; i++) {
3021       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3022       smycols += ourlens[i];
3023       svals   += ourlens[i];
3024       jj++;
3025     }
3026   }
3027   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3028   ierr = PetscFree(vals);CHKERRQ(ierr);
3029   ierr = PetscFree(mycols);CHKERRQ(ierr);
3030   ierr = PetscFree(rowners);CHKERRQ(ierr);
3031   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3038 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3039 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3040 {
3041   PetscErrorCode ierr;
3042   IS             iscol_local;
3043   PetscInt       csize;
3044 
3045   PetscFunctionBegin;
3046   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3047   if (call == MAT_REUSE_MATRIX) {
3048     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3049     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3050   } else {
3051     /* check if we are grabbing all columns*/
3052     PetscBool    isstride;
3053     PetscMPIInt  lisstride = 0,gisstride;
3054     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3055     if (isstride) {
3056       PetscInt  start,len,mstart,mlen;
3057       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3058       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3059       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3060       if (mstart == start && mlen-mstart == len) lisstride = 1;
3061     }
3062     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3063     if (gisstride) {
3064       PetscInt N;
3065       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3066       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3067       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3068       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3069     } else {
3070       PetscInt cbs;
3071       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3072       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3073       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3074     }
3075   }
3076   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3077   if (call == MAT_INITIAL_MATRIX) {
3078     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3079     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3080   }
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3087 /*
3088     Not great since it makes two copies of the submatrix, first an SeqAIJ
3089   in local and then by concatenating the local matrices the end result.
3090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3091 
3092   Note: This requires a sequential iscol with all indices.
3093 */
3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3095 {
3096   PetscErrorCode ierr;
3097   PetscMPIInt    rank,size;
3098   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3099   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3100   PetscBool      allcolumns, colflag;
3101   Mat            M,Mreuse;
3102   MatScalar      *vwork,*aa;
3103   MPI_Comm       comm;
3104   Mat_SeqAIJ     *aij;
3105 
3106   PetscFunctionBegin;
3107   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3110 
3111   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3112   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3113   if (colflag && ncol == mat->cmap->N) {
3114     allcolumns = PETSC_TRUE;
3115     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3116   } else {
3117     allcolumns = PETSC_FALSE;
3118   }
3119   if (call ==  MAT_REUSE_MATRIX) {
3120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3121     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3122     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3123   } else {
3124     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3125   }
3126 
3127   /*
3128       m - number of local rows
3129       n - number of columns (same on all processors)
3130       rstart - first row in new global matrix generated
3131   */
3132   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3133   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3134   if (call == MAT_INITIAL_MATRIX) {
3135     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3136     ii  = aij->i;
3137     jj  = aij->j;
3138 
3139     /*
3140         Determine the number of non-zeros in the diagonal and off-diagonal
3141         portions of the matrix in order to do correct preallocation
3142     */
3143 
3144     /* first get start and end of "diagonal" columns */
3145     if (csize == PETSC_DECIDE) {
3146       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3147       if (mglobal == n) { /* square matrix */
3148         nlocal = m;
3149       } else {
3150         nlocal = n/size + ((n % size) > rank);
3151       }
3152     } else {
3153       nlocal = csize;
3154     }
3155     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3156     rstart = rend - nlocal;
3157     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3158 
3159     /* next, compute all the lengths */
3160     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3161     olens = dlens + m;
3162     for (i=0; i<m; i++) {
3163       jend = ii[i+1] - ii[i];
3164       olen = 0;
3165       dlen = 0;
3166       for (j=0; j<jend; j++) {
3167         if (*jj < rstart || *jj >= rend) olen++;
3168         else dlen++;
3169         jj++;
3170       }
3171       olens[i] = olen;
3172       dlens[i] = dlen;
3173     }
3174     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3175     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3176     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3177     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3178     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3179     ierr = PetscFree(dlens);CHKERRQ(ierr);
3180   } else {
3181     PetscInt ml,nl;
3182 
3183     M    = *newmat;
3184     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3185     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3186     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3187     /*
3188          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3189        rather than the slower MatSetValues().
3190     */
3191     M->was_assembled = PETSC_TRUE;
3192     M->assembled     = PETSC_FALSE;
3193   }
3194   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3195   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3196   ii   = aij->i;
3197   jj   = aij->j;
3198   aa   = aij->a;
3199   for (i=0; i<m; i++) {
3200     row   = rstart + i;
3201     nz    = ii[i+1] - ii[i];
3202     cwork = jj;     jj += nz;
3203     vwork = aa;     aa += nz;
3204     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3205   }
3206 
3207   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3208   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   *newmat = M;
3210 
3211   /* save submatrix used in processor for next request */
3212   if (call ==  MAT_INITIAL_MATRIX) {
3213     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3214     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3215   }
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3221 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3222 {
3223   PetscInt       m,cstart, cend,j,nnz,i,d;
3224   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3225   const PetscInt *JJ;
3226   PetscScalar    *values;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3231 
3232   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3233   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3234   m      = B->rmap->n;
3235   cstart = B->cmap->rstart;
3236   cend   = B->cmap->rend;
3237   rstart = B->rmap->rstart;
3238 
3239   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3240 
3241 #if defined(PETSC_USE_DEBUGGING)
3242   for (i=0; i<m; i++) {
3243     nnz = Ii[i+1]- Ii[i];
3244     JJ  = J + Ii[i];
3245     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3246     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3247     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3248   }
3249 #endif
3250 
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     nnz_max = PetscMax(nnz_max,nnz);
3255     d       = 0;
3256     for (j=0; j<nnz; j++) {
3257       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3258     }
3259     d_nnz[i] = d;
3260     o_nnz[i] = nnz - d;
3261   }
3262   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3263   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3264 
3265   if (v) values = (PetscScalar*)v;
3266   else {
3267     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3268   }
3269 
3270   for (i=0; i<m; i++) {
3271     ii   = i + rstart;
3272     nnz  = Ii[i+1]- Ii[i];
3273     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3274   }
3275   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277 
3278   if (!v) {
3279     ierr = PetscFree(values);CHKERRQ(ierr);
3280   }
3281   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3287 /*@
3288    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3289    (the default parallel PETSc format).
3290 
3291    Collective on MPI_Comm
3292 
3293    Input Parameters:
3294 +  B - the matrix
3295 .  i - the indices into j for the start of each local row (starts with zero)
3296 .  j - the column indices for each local row (starts with zero)
3297 -  v - optional values in the matrix
3298 
3299    Level: developer
3300 
3301    Notes:
3302        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3303      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3304      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3305 
3306        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3307 
3308        The format which is used for the sparse matrix input, is equivalent to a
3309     row-major ordering.. i.e for the following matrix, the input data expected is
3310     as shown
3311 
3312 $        1 0 0
3313 $        2 0 3     P0
3314 $       -------
3315 $        4 5 6     P1
3316 $
3317 $     Process0 [P0]: rows_owned=[0,1]
3318 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3319 $        j =  {0,0,2}  [size = 3]
3320 $        v =  {1,2,3}  [size = 3]
3321 $
3322 $     Process1 [P1]: rows_owned=[2]
3323 $        i =  {0,3}    [size = nrow+1  = 1+1]
3324 $        j =  {0,1,2}  [size = 3]
3325 $        v =  {4,5,6}  [size = 3]
3326 
3327 .keywords: matrix, aij, compressed row, sparse, parallel
3328 
3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3330           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3331 @*/
3332 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3333 {
3334   PetscErrorCode ierr;
3335 
3336   PetscFunctionBegin;
3337   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3343 /*@C
3344    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3345    (the default parallel PETSc format).  For good matrix assembly performance
3346    the user should preallocate the matrix storage by setting the parameters
3347    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3348    performance can be increased by more than a factor of 50.
3349 
3350    Collective on MPI_Comm
3351 
3352    Input Parameters:
3353 +  B - the matrix
3354 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3355            (same value is used for all local rows)
3356 .  d_nnz - array containing the number of nonzeros in the various rows of the
3357            DIAGONAL portion of the local submatrix (possibly different for each row)
3358            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3359            The size of this array is equal to the number of local rows, i.e 'm'.
3360            For matrices that will be factored, you must leave room for (and set)
3361            the diagonal entry even if it is zero.
3362 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3363            submatrix (same value is used for all local rows).
3364 -  o_nnz - array containing the number of nonzeros in the various rows of the
3365            OFF-DIAGONAL portion of the local submatrix (possibly different for
3366            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3367            structure. The size of this array is equal to the number
3368            of local rows, i.e 'm'.
3369 
3370    If the *_nnz parameter is given then the *_nz parameter is ignored
3371 
3372    The AIJ format (also called the Yale sparse matrix format or
3373    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3374    storage.  The stored row and column indices begin with zero.
3375    See Users-Manual: ch_mat for details.
3376 
3377    The parallel matrix is partitioned such that the first m0 rows belong to
3378    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3379    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3380 
3381    The DIAGONAL portion of the local submatrix of a processor can be defined
3382    as the submatrix which is obtained by extraction the part corresponding to
3383    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3384    first row that belongs to the processor, r2 is the last row belonging to
3385    the this processor, and c1-c2 is range of indices of the local part of a
3386    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3387    common case of a square matrix, the row and column ranges are the same and
3388    the DIAGONAL part is also square. The remaining portion of the local
3389    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3390 
3391    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3392 
3393    You can call MatGetInfo() to get information on how effective the preallocation was;
3394    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3395    You can also run with the option -info and look for messages with the string
3396    malloc in them to see if additional memory allocation was needed.
3397 
3398    Example usage:
3399 
3400    Consider the following 8x8 matrix with 34 non-zero values, that is
3401    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3402    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3403    as follows:
3404 
3405 .vb
3406             1  2  0  |  0  3  0  |  0  4
3407     Proc0   0  5  6  |  7  0  0  |  8  0
3408             9  0 10  | 11  0  0  | 12  0
3409     -------------------------------------
3410            13  0 14  | 15 16 17  |  0  0
3411     Proc1   0 18  0  | 19 20 21  |  0  0
3412             0  0  0  | 22 23  0  | 24  0
3413     -------------------------------------
3414     Proc2  25 26 27  |  0  0 28  | 29  0
3415            30  0  0  | 31 32 33  |  0 34
3416 .ve
3417 
3418    This can be represented as a collection of submatrices as:
3419 
3420 .vb
3421       A B C
3422       D E F
3423       G H I
3424 .ve
3425 
3426    Where the submatrices A,B,C are owned by proc0, D,E,F are
3427    owned by proc1, G,H,I are owned by proc2.
3428 
3429    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3430    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3431    The 'M','N' parameters are 8,8, and have the same values on all procs.
3432 
3433    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3434    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3435    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3436    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3437    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3438    matrix, ans [DF] as another SeqAIJ matrix.
3439 
3440    When d_nz, o_nz parameters are specified, d_nz storage elements are
3441    allocated for every row of the local diagonal submatrix, and o_nz
3442    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3443    One way to choose d_nz and o_nz is to use the max nonzerors per local
3444    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3445    In this case, the values of d_nz,o_nz are:
3446 .vb
3447      proc0 : dnz = 2, o_nz = 2
3448      proc1 : dnz = 3, o_nz = 2
3449      proc2 : dnz = 1, o_nz = 4
3450 .ve
3451    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3452    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3453    for proc3. i.e we are using 12+15+10=37 storage locations to store
3454    34 values.
3455 
3456    When d_nnz, o_nnz parameters are specified, the storage is specified
3457    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3458    In the above case the values for d_nnz,o_nnz are:
3459 .vb
3460      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3461      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3462      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3463 .ve
3464    Here the space allocated is sum of all the above values i.e 34, and
3465    hence pre-allocation is perfect.
3466 
3467    Level: intermediate
3468 
3469 .keywords: matrix, aij, compressed row, sparse, parallel
3470 
3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3472           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3473 @*/
3474 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3475 {
3476   PetscErrorCode ierr;
3477 
3478   PetscFunctionBegin;
3479   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3480   PetscValidType(B,1);
3481   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3482   PetscFunctionReturn(0);
3483 }
3484 
3485 #undef __FUNCT__
3486 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3487 /*@
3488      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3489          CSR format the local rows.
3490 
3491    Collective on MPI_Comm
3492 
3493    Input Parameters:
3494 +  comm - MPI communicator
3495 .  m - number of local rows (Cannot be PETSC_DECIDE)
3496 .  n - This value should be the same as the local size used in creating the
3497        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3498        calculated if N is given) For square matrices n is almost always m.
3499 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3500 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3501 .   i - row indices
3502 .   j - column indices
3503 -   a - matrix values
3504 
3505    Output Parameter:
3506 .   mat - the matrix
3507 
3508    Level: intermediate
3509 
3510    Notes:
3511        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3512      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3513      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3514 
3515        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3516 
3517        The format which is used for the sparse matrix input, is equivalent to a
3518     row-major ordering.. i.e for the following matrix, the input data expected is
3519     as shown
3520 
3521 $        1 0 0
3522 $        2 0 3     P0
3523 $       -------
3524 $        4 5 6     P1
3525 $
3526 $     Process0 [P0]: rows_owned=[0,1]
3527 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3528 $        j =  {0,0,2}  [size = 3]
3529 $        v =  {1,2,3}  [size = 3]
3530 $
3531 $     Process1 [P1]: rows_owned=[2]
3532 $        i =  {0,3}    [size = nrow+1  = 1+1]
3533 $        j =  {0,1,2}  [size = 3]
3534 $        v =  {4,5,6}  [size = 3]
3535 
3536 .keywords: matrix, aij, compressed row, sparse, parallel
3537 
3538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3539           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3540 @*/
3541 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544 
3545   PetscFunctionBegin;
3546   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3547   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3548   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3549   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3550   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3551   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3552   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 #undef __FUNCT__
3557 #define __FUNCT__ "MatCreateAIJ"
3558 /*@C
3559    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3560    (the default parallel PETSc format).  For good matrix assembly performance
3561    the user should preallocate the matrix storage by setting the parameters
3562    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3563    performance can be increased by more than a factor of 50.
3564 
3565    Collective on MPI_Comm
3566 
3567    Input Parameters:
3568 +  comm - MPI communicator
3569 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3570            This value should be the same as the local size used in creating the
3571            y vector for the matrix-vector product y = Ax.
3572 .  n - This value should be the same as the local size used in creating the
3573        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3574        calculated if N is given) For square matrices n is almost always m.
3575 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3576 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3577 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3578            (same value is used for all local rows)
3579 .  d_nnz - array containing the number of nonzeros in the various rows of the
3580            DIAGONAL portion of the local submatrix (possibly different for each row)
3581            or NULL, if d_nz is used to specify the nonzero structure.
3582            The size of this array is equal to the number of local rows, i.e 'm'.
3583 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3584            submatrix (same value is used for all local rows).
3585 -  o_nnz - array containing the number of nonzeros in the various rows of the
3586            OFF-DIAGONAL portion of the local submatrix (possibly different for
3587            each row) or NULL, if o_nz is used to specify the nonzero
3588            structure. The size of this array is equal to the number
3589            of local rows, i.e 'm'.
3590 
3591    Output Parameter:
3592 .  A - the matrix
3593 
3594    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3595    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3596    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3597 
3598    Notes:
3599    If the *_nnz parameter is given then the *_nz parameter is ignored
3600 
3601    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3602    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3603    storage requirements for this matrix.
3604 
3605    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3606    processor than it must be used on all processors that share the object for
3607    that argument.
3608 
3609    The user MUST specify either the local or global matrix dimensions
3610    (possibly both).
3611 
3612    The parallel matrix is partitioned across processors such that the
3613    first m0 rows belong to process 0, the next m1 rows belong to
3614    process 1, the next m2 rows belong to process 2 etc.. where
3615    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3616    values corresponding to [m x N] submatrix.
3617 
3618    The columns are logically partitioned with the n0 columns belonging
3619    to 0th partition, the next n1 columns belonging to the next
3620    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3621 
3622    The DIAGONAL portion of the local submatrix on any given processor
3623    is the submatrix corresponding to the rows and columns m,n
3624    corresponding to the given processor. i.e diagonal matrix on
3625    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3626    etc. The remaining portion of the local submatrix [m x (N-n)]
3627    constitute the OFF-DIAGONAL portion. The example below better
3628    illustrates this concept.
3629 
3630    For a square global matrix we define each processor's diagonal portion
3631    to be its local rows and the corresponding columns (a square submatrix);
3632    each processor's off-diagonal portion encompasses the remainder of the
3633    local matrix (a rectangular submatrix).
3634 
3635    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3636 
3637    When calling this routine with a single process communicator, a matrix of
3638    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3639    type of communicator, use the construction mechanism:
3640      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3641 
3642    By default, this format uses inodes (identical nodes) when possible.
3643    We search for consecutive rows with the same nonzero structure, thereby
3644    reusing matrix information to achieve increased efficiency.
3645 
3646    Options Database Keys:
3647 +  -mat_no_inode  - Do not use inodes
3648 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3649 -  -mat_aij_oneindex - Internally use indexing starting at 1
3650         rather than 0.  Note that when calling MatSetValues(),
3651         the user still MUST index entries starting at 0!
3652 
3653 
3654    Example usage:
3655 
3656    Consider the following 8x8 matrix with 34 non-zero values, that is
3657    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3658    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3659    as follows:
3660 
3661 .vb
3662             1  2  0  |  0  3  0  |  0  4
3663     Proc0   0  5  6  |  7  0  0  |  8  0
3664             9  0 10  | 11  0  0  | 12  0
3665     -------------------------------------
3666            13  0 14  | 15 16 17  |  0  0
3667     Proc1   0 18  0  | 19 20 21  |  0  0
3668             0  0  0  | 22 23  0  | 24  0
3669     -------------------------------------
3670     Proc2  25 26 27  |  0  0 28  | 29  0
3671            30  0  0  | 31 32 33  |  0 34
3672 .ve
3673 
3674    This can be represented as a collection of submatrices as:
3675 
3676 .vb
3677       A B C
3678       D E F
3679       G H I
3680 .ve
3681 
3682    Where the submatrices A,B,C are owned by proc0, D,E,F are
3683    owned by proc1, G,H,I are owned by proc2.
3684 
3685    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3686    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3687    The 'M','N' parameters are 8,8, and have the same values on all procs.
3688 
3689    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3690    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3691    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3692    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3693    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3694    matrix, ans [DF] as another SeqAIJ matrix.
3695 
3696    When d_nz, o_nz parameters are specified, d_nz storage elements are
3697    allocated for every row of the local diagonal submatrix, and o_nz
3698    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3699    One way to choose d_nz and o_nz is to use the max nonzerors per local
3700    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3701    In this case, the values of d_nz,o_nz are:
3702 .vb
3703      proc0 : dnz = 2, o_nz = 2
3704      proc1 : dnz = 3, o_nz = 2
3705      proc2 : dnz = 1, o_nz = 4
3706 .ve
3707    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3708    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3709    for proc3. i.e we are using 12+15+10=37 storage locations to store
3710    34 values.
3711 
3712    When d_nnz, o_nnz parameters are specified, the storage is specified
3713    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3714    In the above case the values for d_nnz,o_nnz are:
3715 .vb
3716      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3717      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3718      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3719 .ve
3720    Here the space allocated is sum of all the above values i.e 34, and
3721    hence pre-allocation is perfect.
3722 
3723    Level: intermediate
3724 
3725 .keywords: matrix, aij, compressed row, sparse, parallel
3726 
3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3728           MPIAIJ, MatCreateMPIAIJWithArrays()
3729 @*/
3730 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    size;
3734 
3735   PetscFunctionBegin;
3736   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3737   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3739   if (size > 1) {
3740     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3741     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3744     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3745   }
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3751 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3752 {
3753   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3754   PetscBool      flg;
3755   PetscErrorCode ierr;
3756 
3757   PetscFunctionBegin;
3758   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3759   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3760   if (Ad)     *Ad     = a->A;
3761   if (Ao)     *Ao     = a->B;
3762   if (colmap) *colmap = a->garray;
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3769 {
3770   PetscErrorCode ierr;
3771   PetscInt       i;
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773 
3774   PetscFunctionBegin;
3775   if (coloring->ctype == IS_COLORING_GLOBAL) {
3776     ISColoringValue *allcolors,*colors;
3777     ISColoring      ocoloring;
3778 
3779     /* set coloring for diagonal portion */
3780     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3781 
3782     /* set coloring for off-diagonal portion */
3783     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3784     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3785     for (i=0; i<a->B->cmap->n; i++) {
3786       colors[i] = allcolors[a->garray[i]];
3787     }
3788     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3789     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3790     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3791     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3792   } else if (coloring->ctype == IS_COLORING_LOCAL) {
3793     ISColoringValue *colors;
3794     PetscInt        *larray;
3795     ISColoring      ocoloring;
3796 
3797     /* set coloring for diagonal portion */
3798     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3799     for (i=0; i<a->A->cmap->n; i++) {
3800       larray[i] = i + A->cmap->rstart;
3801     }
3802     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3803     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3804     for (i=0; i<a->A->cmap->n; i++) {
3805       colors[i] = coloring->colors[larray[i]];
3806     }
3807     ierr = PetscFree(larray);CHKERRQ(ierr);
3808     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3809     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3810     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3811 
3812     /* set coloring for off-diagonal portion */
3813     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3814     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3815     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3816     for (i=0; i<a->B->cmap->n; i++) {
3817       colors[i] = coloring->colors[larray[i]];
3818     }
3819     ierr = PetscFree(larray);CHKERRQ(ierr);
3820     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3821     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3822     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3823   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3824   PetscFunctionReturn(0);
3825 }
3826 
3827 #undef __FUNCT__
3828 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3829 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3830 {
3831   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3836   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3843 {
3844   PetscErrorCode ierr;
3845   PetscInt       m,N,i,rstart,nnz,Ii;
3846   PetscInt       *indx;
3847   PetscScalar    *values;
3848 
3849   PetscFunctionBegin;
3850   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3851   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3852     PetscInt       *dnz,*onz,sum,bs,cbs;
3853 
3854     if (n == PETSC_DECIDE) {
3855       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3856     }
3857     /* Check sum(n) = N */
3858     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3859     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3860 
3861     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3862     rstart -= m;
3863 
3864     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3865     for (i=0; i<m; i++) {
3866       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3867       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3868       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3869     }
3870 
3871     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3872     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3873     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3874     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3875     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3876     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3877     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3878   }
3879 
3880   /* numeric phase */
3881   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3882   for (i=0; i<m; i++) {
3883     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3884     Ii   = i + rstart;
3885     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3886     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3887   }
3888   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatFileSplit"
3895 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3896 {
3897   PetscErrorCode    ierr;
3898   PetscMPIInt       rank;
3899   PetscInt          m,N,i,rstart,nnz;
3900   size_t            len;
3901   const PetscInt    *indx;
3902   PetscViewer       out;
3903   char              *name;
3904   Mat               B;
3905   const PetscScalar *values;
3906 
3907   PetscFunctionBegin;
3908   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3909   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3910   /* Should this be the type of the diagonal block of A? */
3911   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3912   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3913   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3914   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3915   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3916   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3917   for (i=0; i<m; i++) {
3918     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3919     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3920     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3921   }
3922   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3923   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924 
3925   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3926   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3927   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3928   sprintf(name,"%s.%d",outfile,rank);
3929   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3930   ierr = PetscFree(name);CHKERRQ(ierr);
3931   ierr = MatView(B,out);CHKERRQ(ierr);
3932   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3933   ierr = MatDestroy(&B);CHKERRQ(ierr);
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3940 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3941 {
3942   PetscErrorCode      ierr;
3943   Mat_Merge_SeqsToMPI *merge;
3944   PetscContainer      container;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3948   if (container) {
3949     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3950     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3951     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3952     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3953     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3954     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3955     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3956     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3957     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3958     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3959     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3960     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3962     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3963     ierr = PetscFree(merge);CHKERRQ(ierr);
3964     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3965   }
3966   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 #include <../src/mat/utils/freespace.h>
3971 #include <petscbt.h>
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3975 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3976 {
3977   PetscErrorCode      ierr;
3978   MPI_Comm            comm;
3979   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3980   PetscMPIInt         size,rank,taga,*len_s;
3981   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3982   PetscInt            proc,m;
3983   PetscInt            **buf_ri,**buf_rj;
3984   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3985   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3986   MPI_Request         *s_waits,*r_waits;
3987   MPI_Status          *status;
3988   MatScalar           *aa=a->a;
3989   MatScalar           **abuf_r,*ba_i;
3990   Mat_Merge_SeqsToMPI *merge;
3991   PetscContainer      container;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3995   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3996 
3997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3998   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3999 
4000   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4001   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4002 
4003   bi     = merge->bi;
4004   bj     = merge->bj;
4005   buf_ri = merge->buf_ri;
4006   buf_rj = merge->buf_rj;
4007 
4008   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4009   owners = merge->rowmap->range;
4010   len_s  = merge->len_s;
4011 
4012   /* send and recv matrix values */
4013   /*-----------------------------*/
4014   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4015   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4016 
4017   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4018   for (proc=0,k=0; proc<size; proc++) {
4019     if (!len_s[proc]) continue;
4020     i    = owners[proc];
4021     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4022     k++;
4023   }
4024 
4025   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4026   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4027   ierr = PetscFree(status);CHKERRQ(ierr);
4028 
4029   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4030   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4031 
4032   /* insert mat values of mpimat */
4033   /*----------------------------*/
4034   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4035   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4036 
4037   for (k=0; k<merge->nrecv; k++) {
4038     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4039     nrows       = *(buf_ri_k[k]);
4040     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4041     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4042   }
4043 
4044   /* set values of ba */
4045   m = merge->rowmap->n;
4046   for (i=0; i<m; i++) {
4047     arow = owners[rank] + i;
4048     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4049     bnzi = bi[i+1] - bi[i];
4050     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4051 
4052     /* add local non-zero vals of this proc's seqmat into ba */
4053     anzi   = ai[arow+1] - ai[arow];
4054     aj     = a->j + ai[arow];
4055     aa     = a->a + ai[arow];
4056     nextaj = 0;
4057     for (j=0; nextaj<anzi; j++) {
4058       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4059         ba_i[j] += aa[nextaj++];
4060       }
4061     }
4062 
4063     /* add received vals into ba */
4064     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4065       /* i-th row */
4066       if (i == *nextrow[k]) {
4067         anzi   = *(nextai[k]+1) - *nextai[k];
4068         aj     = buf_rj[k] + *(nextai[k]);
4069         aa     = abuf_r[k] + *(nextai[k]);
4070         nextaj = 0;
4071         for (j=0; nextaj<anzi; j++) {
4072           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4073             ba_i[j] += aa[nextaj++];
4074           }
4075         }
4076         nextrow[k]++; nextai[k]++;
4077       }
4078     }
4079     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4080   }
4081   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4082   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4083 
4084   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4085   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4086   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4087   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4088   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4089   PetscFunctionReturn(0);
4090 }
4091 
4092 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4093 
4094 #undef __FUNCT__
4095 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4096 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4097 {
4098   PetscErrorCode      ierr;
4099   Mat                 B_mpi;
4100   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4101   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4102   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4103   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4104   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4105   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4106   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4107   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4108   MPI_Status          *status;
4109   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4110   PetscBT             lnkbt;
4111   Mat_Merge_SeqsToMPI *merge;
4112   PetscContainer      container;
4113 
4114   PetscFunctionBegin;
4115   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4116 
4117   /* make sure it is a PETSc comm */
4118   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4121 
4122   ierr = PetscNew(&merge);CHKERRQ(ierr);
4123   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4124 
4125   /* determine row ownership */
4126   /*---------------------------------------------------------*/
4127   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4128   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4129   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4130   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4131   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4132   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4133   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4134 
4135   m      = merge->rowmap->n;
4136   owners = merge->rowmap->range;
4137 
4138   /* determine the number of messages to send, their lengths */
4139   /*---------------------------------------------------------*/
4140   len_s = merge->len_s;
4141 
4142   len          = 0; /* length of buf_si[] */
4143   merge->nsend = 0;
4144   for (proc=0; proc<size; proc++) {
4145     len_si[proc] = 0;
4146     if (proc == rank) {
4147       len_s[proc] = 0;
4148     } else {
4149       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4150       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4151     }
4152     if (len_s[proc]) {
4153       merge->nsend++;
4154       nrows = 0;
4155       for (i=owners[proc]; i<owners[proc+1]; i++) {
4156         if (ai[i+1] > ai[i]) nrows++;
4157       }
4158       len_si[proc] = 2*(nrows+1);
4159       len         += len_si[proc];
4160     }
4161   }
4162 
4163   /* determine the number and length of messages to receive for ij-structure */
4164   /*-------------------------------------------------------------------------*/
4165   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4166   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4167 
4168   /* post the Irecv of j-structure */
4169   /*-------------------------------*/
4170   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4171   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4172 
4173   /* post the Isend of j-structure */
4174   /*--------------------------------*/
4175   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4176 
4177   for (proc=0, k=0; proc<size; proc++) {
4178     if (!len_s[proc]) continue;
4179     i    = owners[proc];
4180     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4181     k++;
4182   }
4183 
4184   /* receives and sends of j-structure are complete */
4185   /*------------------------------------------------*/
4186   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4187   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4188 
4189   /* send and recv i-structure */
4190   /*---------------------------*/
4191   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4192   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4193 
4194   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4195   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4196   for (proc=0,k=0; proc<size; proc++) {
4197     if (!len_s[proc]) continue;
4198     /* form outgoing message for i-structure:
4199          buf_si[0]:                 nrows to be sent
4200                [1:nrows]:           row index (global)
4201                [nrows+1:2*nrows+1]: i-structure index
4202     */
4203     /*-------------------------------------------*/
4204     nrows       = len_si[proc]/2 - 1;
4205     buf_si_i    = buf_si + nrows+1;
4206     buf_si[0]   = nrows;
4207     buf_si_i[0] = 0;
4208     nrows       = 0;
4209     for (i=owners[proc]; i<owners[proc+1]; i++) {
4210       anzi = ai[i+1] - ai[i];
4211       if (anzi) {
4212         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4213         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4214         nrows++;
4215       }
4216     }
4217     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4218     k++;
4219     buf_si += len_si[proc];
4220   }
4221 
4222   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4223   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4224 
4225   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4226   for (i=0; i<merge->nrecv; i++) {
4227     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4228   }
4229 
4230   ierr = PetscFree(len_si);CHKERRQ(ierr);
4231   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4232   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4233   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4234   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4235   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4236   ierr = PetscFree(status);CHKERRQ(ierr);
4237 
4238   /* compute a local seq matrix in each processor */
4239   /*----------------------------------------------*/
4240   /* allocate bi array and free space for accumulating nonzero column info */
4241   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4242   bi[0] = 0;
4243 
4244   /* create and initialize a linked list */
4245   nlnk = N+1;
4246   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4247 
4248   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4249   len  = ai[owners[rank+1]] - ai[owners[rank]];
4250   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4251 
4252   current_space = free_space;
4253 
4254   /* determine symbolic info for each local row */
4255   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4256 
4257   for (k=0; k<merge->nrecv; k++) {
4258     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4259     nrows       = *buf_ri_k[k];
4260     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4261     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4262   }
4263 
4264   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4265   len  = 0;
4266   for (i=0; i<m; i++) {
4267     bnzi = 0;
4268     /* add local non-zero cols of this proc's seqmat into lnk */
4269     arow  = owners[rank] + i;
4270     anzi  = ai[arow+1] - ai[arow];
4271     aj    = a->j + ai[arow];
4272     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4273     bnzi += nlnk;
4274     /* add received col data into lnk */
4275     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4276       if (i == *nextrow[k]) { /* i-th row */
4277         anzi  = *(nextai[k]+1) - *nextai[k];
4278         aj    = buf_rj[k] + *nextai[k];
4279         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4280         bnzi += nlnk;
4281         nextrow[k]++; nextai[k]++;
4282       }
4283     }
4284     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4285 
4286     /* if free space is not available, make more free space */
4287     if (current_space->local_remaining<bnzi) {
4288       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4289       nspacedouble++;
4290     }
4291     /* copy data into free space, then initialize lnk */
4292     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4293     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4294 
4295     current_space->array           += bnzi;
4296     current_space->local_used      += bnzi;
4297     current_space->local_remaining -= bnzi;
4298 
4299     bi[i+1] = bi[i] + bnzi;
4300   }
4301 
4302   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4303 
4304   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4305   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4306   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4307 
4308   /* create symbolic parallel matrix B_mpi */
4309   /*---------------------------------------*/
4310   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4311   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4312   if (n==PETSC_DECIDE) {
4313     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4314   } else {
4315     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4316   }
4317   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4318   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4319   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4320   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4321   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4322 
4323   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4324   B_mpi->assembled    = PETSC_FALSE;
4325   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4326   merge->bi           = bi;
4327   merge->bj           = bj;
4328   merge->buf_ri       = buf_ri;
4329   merge->buf_rj       = buf_rj;
4330   merge->coi          = NULL;
4331   merge->coj          = NULL;
4332   merge->owners_co    = NULL;
4333 
4334   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4335 
4336   /* attach the supporting struct to B_mpi for reuse */
4337   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4338   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4339   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4340   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4341   *mpimat = B_mpi;
4342 
4343   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4349 /*@C
4350       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4351                  matrices from each processor
4352 
4353     Collective on MPI_Comm
4354 
4355    Input Parameters:
4356 +    comm - the communicators the parallel matrix will live on
4357 .    seqmat - the input sequential matrices
4358 .    m - number of local rows (or PETSC_DECIDE)
4359 .    n - number of local columns (or PETSC_DECIDE)
4360 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4361 
4362    Output Parameter:
4363 .    mpimat - the parallel matrix generated
4364 
4365     Level: advanced
4366 
4367    Notes:
4368      The dimensions of the sequential matrix in each processor MUST be the same.
4369      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4370      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4371 @*/
4372 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4373 {
4374   PetscErrorCode ierr;
4375   PetscMPIInt    size;
4376 
4377   PetscFunctionBegin;
4378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4379   if (size == 1) {
4380     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4381     if (scall == MAT_INITIAL_MATRIX) {
4382       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4383     } else {
4384       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4385     }
4386     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4387     PetscFunctionReturn(0);
4388   }
4389   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4390   if (scall == MAT_INITIAL_MATRIX) {
4391     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4392   }
4393   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4394   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4400 /*@
4401      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4402           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4403           with MatGetSize()
4404 
4405     Not Collective
4406 
4407    Input Parameters:
4408 +    A - the matrix
4409 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4410 
4411    Output Parameter:
4412 .    A_loc - the local sequential matrix generated
4413 
4414     Level: developer
4415 
4416 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4417 
4418 @*/
4419 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4420 {
4421   PetscErrorCode ierr;
4422   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4423   Mat_SeqAIJ     *mat,*a,*b;
4424   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4425   MatScalar      *aa,*ba,*cam;
4426   PetscScalar    *ca;
4427   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4428   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4429   PetscBool      match;
4430   MPI_Comm       comm;
4431   PetscMPIInt    size;
4432 
4433   PetscFunctionBegin;
4434   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4435   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4436   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4437   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4438   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4439 
4440   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4441   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4442   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4443   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4444   aa = a->a; ba = b->a;
4445   if (scall == MAT_INITIAL_MATRIX) {
4446     if (size == 1) {
4447       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4448       PetscFunctionReturn(0);
4449     }
4450 
4451     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4452     ci[0] = 0;
4453     for (i=0; i<am; i++) {
4454       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4455     }
4456     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4457     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4458     k    = 0;
4459     for (i=0; i<am; i++) {
4460       ncols_o = bi[i+1] - bi[i];
4461       ncols_d = ai[i+1] - ai[i];
4462       /* off-diagonal portion of A */
4463       for (jo=0; jo<ncols_o; jo++) {
4464         col = cmap[*bj];
4465         if (col >= cstart) break;
4466         cj[k]   = col; bj++;
4467         ca[k++] = *ba++;
4468       }
4469       /* diagonal portion of A */
4470       for (j=0; j<ncols_d; j++) {
4471         cj[k]   = cstart + *aj++;
4472         ca[k++] = *aa++;
4473       }
4474       /* off-diagonal portion of A */
4475       for (j=jo; j<ncols_o; j++) {
4476         cj[k]   = cmap[*bj++];
4477         ca[k++] = *ba++;
4478       }
4479     }
4480     /* put together the new matrix */
4481     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4482     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4483     /* Since these are PETSc arrays, change flags to free them as necessary. */
4484     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4485     mat->free_a  = PETSC_TRUE;
4486     mat->free_ij = PETSC_TRUE;
4487     mat->nonew   = 0;
4488   } else if (scall == MAT_REUSE_MATRIX) {
4489     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4490     ci = mat->i; cj = mat->j; cam = mat->a;
4491     for (i=0; i<am; i++) {
4492       /* off-diagonal portion of A */
4493       ncols_o = bi[i+1] - bi[i];
4494       for (jo=0; jo<ncols_o; jo++) {
4495         col = cmap[*bj];
4496         if (col >= cstart) break;
4497         *cam++ = *ba++; bj++;
4498       }
4499       /* diagonal portion of A */
4500       ncols_d = ai[i+1] - ai[i];
4501       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4502       /* off-diagonal portion of A */
4503       for (j=jo; j<ncols_o; j++) {
4504         *cam++ = *ba++; bj++;
4505       }
4506     }
4507   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4508   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4514 /*@C
4515      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4516 
4517     Not Collective
4518 
4519    Input Parameters:
4520 +    A - the matrix
4521 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4522 -    row, col - index sets of rows and columns to extract (or NULL)
4523 
4524    Output Parameter:
4525 .    A_loc - the local sequential matrix generated
4526 
4527     Level: developer
4528 
4529 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4530 
4531 @*/
4532 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4533 {
4534   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4535   PetscErrorCode ierr;
4536   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4537   IS             isrowa,iscola;
4538   Mat            *aloc;
4539   PetscBool      match;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4543   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4544   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4545   if (!row) {
4546     start = A->rmap->rstart; end = A->rmap->rend;
4547     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4548   } else {
4549     isrowa = *row;
4550   }
4551   if (!col) {
4552     start = A->cmap->rstart;
4553     cmap  = a->garray;
4554     nzA   = a->A->cmap->n;
4555     nzB   = a->B->cmap->n;
4556     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4557     ncols = 0;
4558     for (i=0; i<nzB; i++) {
4559       if (cmap[i] < start) idx[ncols++] = cmap[i];
4560       else break;
4561     }
4562     imark = i;
4563     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4564     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4565     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4566   } else {
4567     iscola = *col;
4568   }
4569   if (scall != MAT_INITIAL_MATRIX) {
4570     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4571     aloc[0] = *A_loc;
4572   }
4573   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4574   *A_loc = aloc[0];
4575   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4576   if (!row) {
4577     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4578   }
4579   if (!col) {
4580     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4581   }
4582   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4583   PetscFunctionReturn(0);
4584 }
4585 
4586 #undef __FUNCT__
4587 #define __FUNCT__ "MatGetBrowsOfAcols"
4588 /*@C
4589     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4590 
4591     Collective on Mat
4592 
4593    Input Parameters:
4594 +    A,B - the matrices in mpiaij format
4595 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4596 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4597 
4598    Output Parameter:
4599 +    rowb, colb - index sets of rows and columns of B to extract
4600 -    B_seq - the sequential matrix generated
4601 
4602     Level: developer
4603 
4604 @*/
4605 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4606 {
4607   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4608   PetscErrorCode ierr;
4609   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4610   IS             isrowb,iscolb;
4611   Mat            *bseq=NULL;
4612 
4613   PetscFunctionBegin;
4614   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4615     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4616   }
4617   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4618 
4619   if (scall == MAT_INITIAL_MATRIX) {
4620     start = A->cmap->rstart;
4621     cmap  = a->garray;
4622     nzA   = a->A->cmap->n;
4623     nzB   = a->B->cmap->n;
4624     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4625     ncols = 0;
4626     for (i=0; i<nzB; i++) {  /* row < local row index */
4627       if (cmap[i] < start) idx[ncols++] = cmap[i];
4628       else break;
4629     }
4630     imark = i;
4631     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4632     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4633     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4634     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4635   } else {
4636     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4637     isrowb  = *rowb; iscolb = *colb;
4638     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4639     bseq[0] = *B_seq;
4640   }
4641   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4642   *B_seq = bseq[0];
4643   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4644   if (!rowb) {
4645     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4646   } else {
4647     *rowb = isrowb;
4648   }
4649   if (!colb) {
4650     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4651   } else {
4652     *colb = iscolb;
4653   }
4654   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4655   PetscFunctionReturn(0);
4656 }
4657 
4658 #undef __FUNCT__
4659 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4660 /*
4661     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4662     of the OFF-DIAGONAL portion of local A
4663 
4664     Collective on Mat
4665 
4666    Input Parameters:
4667 +    A,B - the matrices in mpiaij format
4668 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4669 
4670    Output Parameter:
4671 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4672 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4673 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4674 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4675 
4676     Level: developer
4677 
4678 */
4679 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4680 {
4681   VecScatter_MPI_General *gen_to,*gen_from;
4682   PetscErrorCode         ierr;
4683   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4684   Mat_SeqAIJ             *b_oth;
4685   VecScatter             ctx =a->Mvctx;
4686   MPI_Comm               comm;
4687   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4688   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4689   PetscScalar            *rvalues,*svalues;
4690   MatScalar              *b_otha,*bufa,*bufA;
4691   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4692   MPI_Request            *rwaits = NULL,*swaits = NULL;
4693   MPI_Status             *sstatus,rstatus;
4694   PetscMPIInt            jj,size;
4695   PetscInt               *cols,sbs,rbs;
4696   PetscScalar            *vals;
4697 
4698   PetscFunctionBegin;
4699   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4700   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4701 
4702   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4703     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4704   }
4705   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4707 
4708   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4709   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4710   rvalues  = gen_from->values; /* holds the length of receiving row */
4711   svalues  = gen_to->values;   /* holds the length of sending row */
4712   nrecvs   = gen_from->n;
4713   nsends   = gen_to->n;
4714 
4715   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4716   srow    = gen_to->indices;    /* local row index to be sent */
4717   sstarts = gen_to->starts;
4718   sprocs  = gen_to->procs;
4719   sstatus = gen_to->sstatus;
4720   sbs     = gen_to->bs;
4721   rstarts = gen_from->starts;
4722   rprocs  = gen_from->procs;
4723   rbs     = gen_from->bs;
4724 
4725   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4726   if (scall == MAT_INITIAL_MATRIX) {
4727     /* i-array */
4728     /*---------*/
4729     /*  post receives */
4730     for (i=0; i<nrecvs; i++) {
4731       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4732       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4733       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4734     }
4735 
4736     /* pack the outgoing message */
4737     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4738 
4739     sstartsj[0] = 0;
4740     rstartsj[0] = 0;
4741     len         = 0; /* total length of j or a array to be sent */
4742     k           = 0;
4743     for (i=0; i<nsends; i++) {
4744       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4745       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4748         for (l=0; l<sbs; l++) {
4749           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4750 
4751           rowlen[j*sbs+l] = ncols;
4752 
4753           len += ncols;
4754           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4755         }
4756         k++;
4757       }
4758       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4759 
4760       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4761     }
4762     /* recvs and sends of i-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768 
4769     /* allocate buffers for sending j and a arrays */
4770     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4771     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4772 
4773     /* create i-array of B_oth */
4774     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4775 
4776     b_othi[0] = 0;
4777     len       = 0; /* total length of j or a array to be received */
4778     k         = 0;
4779     for (i=0; i<nrecvs; i++) {
4780       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4781       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4782       for (j=0; j<nrows; j++) {
4783         b_othi[k+1] = b_othi[k] + rowlen[j];
4784         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4785         k++;
4786       }
4787       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4788     }
4789 
4790     /* allocate space for j and a arrrays of B_oth */
4791     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4792     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4793 
4794     /* j-array */
4795     /*---------*/
4796     /*  post receives of j-array */
4797     for (i=0; i<nrecvs; i++) {
4798       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4799       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4800     }
4801 
4802     /* pack the outgoing message j-array */
4803     k = 0;
4804     for (i=0; i<nsends; i++) {
4805       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4806       bufJ  = bufj+sstartsj[i];
4807       for (j=0; j<nrows; j++) {
4808         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4809         for (ll=0; ll<sbs; ll++) {
4810           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4811           for (l=0; l<ncols; l++) {
4812             *bufJ++ = cols[l];
4813           }
4814           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4815         }
4816       }
4817       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4818     }
4819 
4820     /* recvs and sends of j-array are completed */
4821     i = nrecvs;
4822     while (i--) {
4823       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4824     }
4825     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4826   } else if (scall == MAT_REUSE_MATRIX) {
4827     sstartsj = *startsj_s;
4828     rstartsj = *startsj_r;
4829     bufa     = *bufa_ptr;
4830     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4831     b_otha   = b_oth->a;
4832   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4833 
4834   /* a-array */
4835   /*---------*/
4836   /*  post receives of a-array */
4837   for (i=0; i<nrecvs; i++) {
4838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4839     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4840   }
4841 
4842   /* pack the outgoing message a-array */
4843   k = 0;
4844   for (i=0; i<nsends; i++) {
4845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4846     bufA  = bufa+sstartsj[i];
4847     for (j=0; j<nrows; j++) {
4848       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4849       for (ll=0; ll<sbs; ll++) {
4850         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4851         for (l=0; l<ncols; l++) {
4852           *bufA++ = vals[l];
4853         }
4854         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4855       }
4856     }
4857     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4858   }
4859   /* recvs and sends of a-array are completed */
4860   i = nrecvs;
4861   while (i--) {
4862     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4863   }
4864   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4865   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4866 
4867   if (scall == MAT_INITIAL_MATRIX) {
4868     /* put together the new matrix */
4869     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4870 
4871     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4872     /* Since these are PETSc arrays, change flags to free them as necessary. */
4873     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4874     b_oth->free_a  = PETSC_TRUE;
4875     b_oth->free_ij = PETSC_TRUE;
4876     b_oth->nonew   = 0;
4877 
4878     ierr = PetscFree(bufj);CHKERRQ(ierr);
4879     if (!startsj_s || !bufa_ptr) {
4880       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4882     } else {
4883       *startsj_s = sstartsj;
4884       *startsj_r = rstartsj;
4885       *bufa_ptr  = bufa;
4886     }
4887   }
4888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatGetCommunicationStructs"
4894 /*@C
4895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4896 
4897   Not Collective
4898 
4899   Input Parameters:
4900 . A - The matrix in mpiaij format
4901 
4902   Output Parameter:
4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4904 . colmap - A map from global column index to local index into lvec
4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4906 
4907   Level: developer
4908 
4909 @*/
4910 #if defined(PETSC_USE_CTABLE)
4911 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4912 #else
4913 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4914 #endif
4915 {
4916   Mat_MPIAIJ *a;
4917 
4918   PetscFunctionBegin;
4919   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4920   PetscValidPointer(lvec, 2);
4921   PetscValidPointer(colmap, 3);
4922   PetscValidPointer(multScatter, 4);
4923   a = (Mat_MPIAIJ*) A->data;
4924   if (lvec) *lvec = a->lvec;
4925   if (colmap) *colmap = a->colmap;
4926   if (multScatter) *multScatter = a->Mvctx;
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4933 #if defined(PETSC_HAVE_ELEMENTAL)
4934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4935 #endif
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4939 /*
4940     Computes (B'*A')' since computing B*A directly is untenable
4941 
4942                n                       p                          p
4943         (              )       (              )         (                  )
4944       m (      A       )  *  n (       B      )   =   m (         C        )
4945         (              )       (              )         (                  )
4946 
4947 */
4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4949 {
4950   PetscErrorCode ierr;
4951   Mat            At,Bt,Ct;
4952 
4953   PetscFunctionBegin;
4954   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4955   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4956   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4957   ierr = MatDestroy(&At);CHKERRQ(ierr);
4958   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4959   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4960   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4961   PetscFunctionReturn(0);
4962 }
4963 
4964 #undef __FUNCT__
4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4967 {
4968   PetscErrorCode ierr;
4969   PetscInt       m=A->rmap->n,n=B->cmap->n;
4970   Mat            Cmat;
4971 
4972   PetscFunctionBegin;
4973   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4974   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4975   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4977   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4978   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4979   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4980   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4981 
4982   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4983 
4984   *C = Cmat;
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /* ----------------------------------------------------------------*/
4989 #undef __FUNCT__
4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4991 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4992 {
4993   PetscErrorCode ierr;
4994 
4995   PetscFunctionBegin;
4996   if (scall == MAT_INITIAL_MATRIX) {
4997     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4998     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4999     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5000   }
5001   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5002   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5003   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*MC
5008    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5009 
5010    Options Database Keys:
5011 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5012 
5013   Level: beginner
5014 
5015 .seealso: MatCreateAIJ()
5016 M*/
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatCreate_MPIAIJ"
5020 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5021 {
5022   Mat_MPIAIJ     *b;
5023   PetscErrorCode ierr;
5024   PetscMPIInt    size;
5025 
5026   PetscFunctionBegin;
5027   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5028 
5029   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5030   B->data       = (void*)b;
5031   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5032   B->assembled  = PETSC_FALSE;
5033   B->insertmode = NOT_SET_VALUES;
5034   b->size       = size;
5035 
5036   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5037 
5038   /* build cache for off array entries formed */
5039   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5040 
5041   b->donotstash  = PETSC_FALSE;
5042   b->colmap      = 0;
5043   b->garray      = 0;
5044   b->roworiented = PETSC_TRUE;
5045 
5046   /* stuff used for matrix vector multiply */
5047   b->lvec  = NULL;
5048   b->Mvctx = NULL;
5049 
5050   /* stuff for MatGetRow() */
5051   b->rowindices   = 0;
5052   b->rowvalues    = 0;
5053   b->getrowactive = PETSC_FALSE;
5054 
5055   /* flexible pointer used in CUSP/CUSPARSE classes */
5056   b->spptr = NULL;
5057 
5058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5068 #if defined(PETSC_HAVE_ELEMENTAL)
5069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5070 #endif
5071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5074   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5075   PetscFunctionReturn(0);
5076 }
5077 
5078 #undef __FUNCT__
5079 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5080 /*@C
5081      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5082          and "off-diagonal" part of the matrix in CSR format.
5083 
5084    Collective on MPI_Comm
5085 
5086    Input Parameters:
5087 +  comm - MPI communicator
5088 .  m - number of local rows (Cannot be PETSC_DECIDE)
5089 .  n - This value should be the same as the local size used in creating the
5090        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5091        calculated if N is given) For square matrices n is almost always m.
5092 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5093 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5094 .   i - row indices for "diagonal" portion of matrix
5095 .   j - column indices
5096 .   a - matrix values
5097 .   oi - row indices for "off-diagonal" portion of matrix
5098 .   oj - column indices
5099 -   oa - matrix values
5100 
5101    Output Parameter:
5102 .   mat - the matrix
5103 
5104    Level: advanced
5105 
5106    Notes:
5107        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5108        must free the arrays once the matrix has been destroyed and not before.
5109 
5110        The i and j indices are 0 based
5111 
5112        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5113 
5114        This sets local rows and cannot be used to set off-processor values.
5115 
5116        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5117        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5118        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5119        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5120        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5121        communication if it is known that only local entries will be set.
5122 
5123 .keywords: matrix, aij, compressed row, sparse, parallel
5124 
5125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5126           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5127 @*/
5128 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5129 {
5130   PetscErrorCode ierr;
5131   Mat_MPIAIJ     *maij;
5132 
5133   PetscFunctionBegin;
5134   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5135   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5136   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5137   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5138   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5139   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5140   maij = (Mat_MPIAIJ*) (*mat)->data;
5141 
5142   (*mat)->preallocated = PETSC_TRUE;
5143 
5144   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5145   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5146 
5147   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5148   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5149 
5150   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5151   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5152   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5153   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5154 
5155   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5156   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5157   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5158   PetscFunctionReturn(0);
5159 }
5160 
5161 /*
5162     Special version for direct calls from Fortran
5163 */
5164 #include <petsc/private/fortranimpl.h>
5165 
5166 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5167 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5168 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5169 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5170 #endif
5171 
5172 /* Change these macros so can be used in void function */
5173 #undef CHKERRQ
5174 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5175 #undef SETERRQ2
5176 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5177 #undef SETERRQ3
5178 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5179 #undef SETERRQ
5180 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5181 
5182 #undef __FUNCT__
5183 #define __FUNCT__ "matsetvaluesmpiaij_"
5184 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5185 {
5186   Mat            mat  = *mmat;
5187   PetscInt       m    = *mm, n = *mn;
5188   InsertMode     addv = *maddv;
5189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5190   PetscScalar    value;
5191   PetscErrorCode ierr;
5192 
5193   MatCheckPreallocated(mat,1);
5194   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5195 
5196 #if defined(PETSC_USE_DEBUG)
5197   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5198 #endif
5199   {
5200     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5201     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5202     PetscBool roworiented = aij->roworiented;
5203 
5204     /* Some Variables required in the macro */
5205     Mat        A                 = aij->A;
5206     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5207     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5208     MatScalar  *aa               = a->a;
5209     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5210     Mat        B                 = aij->B;
5211     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5212     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5213     MatScalar  *ba               = b->a;
5214 
5215     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5216     PetscInt  nonew = a->nonew;
5217     MatScalar *ap1,*ap2;
5218 
5219     PetscFunctionBegin;
5220     for (i=0; i<m; i++) {
5221       if (im[i] < 0) continue;
5222 #if defined(PETSC_USE_DEBUG)
5223       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5224 #endif
5225       if (im[i] >= rstart && im[i] < rend) {
5226         row      = im[i] - rstart;
5227         lastcol1 = -1;
5228         rp1      = aj + ai[row];
5229         ap1      = aa + ai[row];
5230         rmax1    = aimax[row];
5231         nrow1    = ailen[row];
5232         low1     = 0;
5233         high1    = nrow1;
5234         lastcol2 = -1;
5235         rp2      = bj + bi[row];
5236         ap2      = ba + bi[row];
5237         rmax2    = bimax[row];
5238         nrow2    = bilen[row];
5239         low2     = 0;
5240         high2    = nrow2;
5241 
5242         for (j=0; j<n; j++) {
5243           if (roworiented) value = v[i*n+j];
5244           else value = v[i+j*m];
5245           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5246           if (in[j] >= cstart && in[j] < cend) {
5247             col = in[j] - cstart;
5248             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5249           } else if (in[j] < 0) continue;
5250 #if defined(PETSC_USE_DEBUG)
5251           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5252 #endif
5253           else {
5254             if (mat->was_assembled) {
5255               if (!aij->colmap) {
5256                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5257               }
5258 #if defined(PETSC_USE_CTABLE)
5259               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5260               col--;
5261 #else
5262               col = aij->colmap[in[j]] - 1;
5263 #endif
5264               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5265                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5266                 col  =  in[j];
5267                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5268                 B     = aij->B;
5269                 b     = (Mat_SeqAIJ*)B->data;
5270                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5271                 rp2   = bj + bi[row];
5272                 ap2   = ba + bi[row];
5273                 rmax2 = bimax[row];
5274                 nrow2 = bilen[row];
5275                 low2  = 0;
5276                 high2 = nrow2;
5277                 bm    = aij->B->rmap->n;
5278                 ba    = b->a;
5279               }
5280             } else col = in[j];
5281             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5282           }
5283         }
5284       } else if (!aij->donotstash) {
5285         if (roworiented) {
5286           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5287         } else {
5288           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5289         }
5290       }
5291     }
5292   }
5293   PetscFunctionReturnVoid();
5294 }
5295 
5296