xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fa2f3a98544cffa8147e57031dd6780dd55215d2)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125 
126   PetscFunctionBegin;
127   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
128     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
129   } else {
130     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
138   PetscErrorCode ierr;
139   PetscInt       i,rstart,nrows,*rows;
140 
141   PetscFunctionBegin;
142   *zrows = NULL;
143   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
144   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
145   for (i=0; i<nrows; i++) rows[i] += rstart;
146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
154   PetscInt       i,n,*garray = aij->garray;
155   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
156   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
157   PetscReal      *work;
158 
159   PetscFunctionBegin;
160   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
161   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
162   if (type == NORM_2) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
168     }
169   } else if (type == NORM_1) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
175     }
176   } else if (type == NORM_INFINITY) {
177     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
178       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
179     }
180     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
181       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
182     }
183 
184   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
185   if (type == NORM_INFINITY) {
186     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
187   } else {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   }
190   ierr = PetscFree(work);CHKERRQ(ierr);
191   if (type == NORM_2) {
192     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
198 {
199   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
200   IS              sis,gis;
201   PetscErrorCode  ierr;
202   const PetscInt  *isis,*igis;
203   PetscInt        n,*iis,nsis,ngis,rstart,i;
204 
205   PetscFunctionBegin;
206   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
207   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
208   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
209   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
210   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
211   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
212 
213   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
215   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
216   n    = ngis + nsis;
217   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
219   for (i=0; i<n; i++) iis[i] += rstart;
220   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
221 
222   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
224   ierr = ISDestroy(&sis);CHKERRQ(ierr);
225   ierr = ISDestroy(&gis);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
231     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232 
233     Only for square matrices
234 
235     Used by a preconditioner, hence PETSC_EXTERN
236 */
237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
238 {
239   PetscMPIInt    rank,size;
240   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241   PetscErrorCode ierr;
242   Mat            mat;
243   Mat_SeqAIJ     *gmata;
244   PetscMPIInt    tag;
245   MPI_Status     status;
246   PetscBool      aij;
247   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
248 
249   PetscFunctionBegin;
250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
254     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
255   }
256   if (reuse == MAT_INITIAL_MATRIX) {
257     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
258     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
259     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
260     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
261     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
262     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
263     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
264     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
265     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
266 
267     rowners[0] = 0;
268     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
269     rstart = rowners[rank];
270     rend   = rowners[rank+1];
271     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
272     if (!rank) {
273       gmata = (Mat_SeqAIJ*) gmat->data;
274       /* send row lengths to all processors */
275       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
276       for (i=1; i<size; i++) {
277         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
278       }
279       /* determine number diagonal and off-diagonal counts */
280       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
281       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
282       jj   = 0;
283       for (i=0; i<m; i++) {
284         for (j=0; j<dlens[i]; j++) {
285           if (gmata->j[jj] < rstart) ld[i]++;
286           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
287           jj++;
288         }
289       }
290       /* send column indices to other processes */
291       for (i=1; i<size; i++) {
292         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
293         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
295       }
296 
297       /* send numerical values to other processes */
298       for (i=1; i<size; i++) {
299         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
300         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
301       }
302       gmataa = gmata->a;
303       gmataj = gmata->j;
304 
305     } else {
306       /* receive row lengths */
307       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       /* receive column indices */
309       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
311       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* determine number diagonal and off-diagonal counts */
313       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
314       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
315       jj   = 0;
316       for (i=0; i<m; i++) {
317         for (j=0; j<dlens[i]; j++) {
318           if (gmataj[jj] < rstart) ld[i]++;
319           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
320           jj++;
321         }
322       }
323       /* receive numerical values */
324       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
326     }
327     /* set preallocation */
328     for (i=0; i<m; i++) {
329       dlens[i] -= olens[i];
330     }
331     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
332     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
333 
334     for (i=0; i<m; i++) {
335       dlens[i] += olens[i];
336     }
337     cnt = 0;
338     for (i=0; i<m; i++) {
339       row  = rstart + i;
340       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
341       cnt += dlens[i];
342     }
343     if (rank) {
344       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
345     }
346     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
347     ierr = PetscFree(rowners);CHKERRQ(ierr);
348 
349     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350 
351     *inmat = mat;
352   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
353     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
354     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
355     mat  = *inmat;
356     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
357     if (!rank) {
358       /* send numerical values to other processes */
359       gmata  = (Mat_SeqAIJ*) gmat->data;
360       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
361       gmataa = gmata->a;
362       for (i=1; i<size; i++) {
363         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
364         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
365       }
366       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
367     } else {
368       /* receive numerical values from process 0*/
369       nz   = Ad->nz + Ao->nz;
370       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
371       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
372     }
373     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
374     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
375     ad = Ad->a;
376     ao = Ao->a;
377     if (mat->rmap->n) {
378       i  = 0;
379       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
380       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
381     }
382     for (i=1; i<mat->rmap->n; i++) {
383       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     i--;
387     if (mat->rmap->n) {
388       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (rank) {
391       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
392     }
393   }
394   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400   Local utility routine that creates a mapping from the global column
401 number to the local number in the off-diagonal part of the local
402 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
403 a slightly higher hash table cost; without it it is not scalable (each processor
404 has an order N integer array but is fast to acess.
405 */
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
463   { \
464     if (col <= lastcol2) low2 = 0;                        \
465     else high2 = nrow2;                                   \
466     lastcol2 = col;                                       \
467     while (high2-low2 > 5) {                              \
468       t = (low2+high2)/2;                                 \
469       if (rp2[t] > col) high2 = t;                        \
470       else             low2  = t;                         \
471     }                                                     \
472     for (_i=low2; _i<high2; _i++) {                       \
473       if (rp2[_i] > col) break;                           \
474       if (rp2[_i] == col) {                               \
475         if (addv == ADD_VALUES) ap2[_i] += value;         \
476         else                    ap2[_i] = value;          \
477         goto b_noinsert;                                  \
478       }                                                   \
479     }                                                     \
480     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
482     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
483     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
484     N = nrow2++ - 1; b->nz++; high2++;                    \
485     /* shift up all the later entries in this row */      \
486     for (ii=N; ii>=_i; ii--) {                            \
487       rp2[ii+1] = rp2[ii];                                \
488       ap2[ii+1] = ap2[ii];                                \
489     }                                                     \
490     rp2[_i] = col;                                        \
491     ap2[_i] = value;                                      \
492     B->nonzerostate++;                                    \
493     b_noinsert: ;                                         \
494     bilen[row] = nrow2;                                   \
495   }
496 
497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
498 {
499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
500   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501   PetscErrorCode ierr;
502   PetscInt       l,*garray = mat->garray,diag;
503 
504   PetscFunctionBegin;
505   /* code only works for square matrices A */
506 
507   /* find size of row to the left of the diagonal part */
508   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
509   row  = row - diag;
510   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
511     if (garray[b->j[b->i[row]+l]] > diag) break;
512   }
513   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
514 
515   /* diagonal part */
516   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* right of diagonal part */
519   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
524 {
525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
526   PetscScalar    value;
527   PetscErrorCode ierr;
528   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
529   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
530   PetscBool      roworiented = aij->roworiented;
531 
532   /* Some Variables required in the macro */
533   Mat        A                 = aij->A;
534   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
535   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
536   MatScalar  *aa               = a->a;
537   PetscBool  ignorezeroentries = a->ignorezeroentries;
538   Mat        B                 = aij->B;
539   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
540   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
541   MatScalar  *ba               = b->a;
542 
543   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
544   PetscInt  nonew;
545   MatScalar *ap1,*ap2;
546 
547   PetscFunctionBegin;
548   for (i=0; i<m; i++) {
549     if (im[i] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
552 #endif
553     if (im[i] >= rstart && im[i] < rend) {
554       row      = im[i] - rstart;
555       lastcol1 = -1;
556       rp1      = aj + ai[row];
557       ap1      = aa + ai[row];
558       rmax1    = aimax[row];
559       nrow1    = ailen[row];
560       low1     = 0;
561       high1    = nrow1;
562       lastcol2 = -1;
563       rp2      = bj + bi[row];
564       ap2      = ba + bi[row];
565       rmax2    = bimax[row];
566       nrow2    = bilen[row];
567       low2     = 0;
568       high2    = nrow2;
569 
570       for (j=0; j<n; j++) {
571         if (roworiented) value = v[i*n+j];
572         else             value = v[i+j*m];
573         if (in[j] >= cstart && in[j] < cend) {
574           col   = in[j] - cstart;
575           nonew = a->nonew;
576           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
577           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
578         } else if (in[j] < 0) continue;
579 #if defined(PETSC_USE_DEBUG)
580         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
581 #endif
582         else {
583           if (mat->was_assembled) {
584             if (!aij->colmap) {
585               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
586             }
587 #if defined(PETSC_USE_CTABLE)
588             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
589             col--;
590 #else
591             col = aij->colmap[in[j]] - 1;
592 #endif
593             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
594               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
595               col  =  in[j];
596               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
597               B     = aij->B;
598               b     = (Mat_SeqAIJ*)B->data;
599               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
600               rp2   = bj + bi[row];
601               ap2   = ba + bi[row];
602               rmax2 = bimax[row];
603               nrow2 = bilen[row];
604               low2  = 0;
605               high2 = nrow2;
606               bm    = aij->B->rmap->n;
607               ba    = b->a;
608             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
609           } else col = in[j];
610           nonew = b->nonew;
611           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!aij->donotstash) {
617         mat->assembled = PETSC_FALSE;
618         if (roworiented) {
619           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!aij->colmap) {
650             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
670 
671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
674   PetscErrorCode ierr;
675   PetscInt       nstash,reallocs;
676 
677   PetscFunctionBegin;
678   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
679 
680   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
689   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
690   PetscErrorCode ierr;
691   PetscMPIInt    n;
692   PetscInt       i,j,rstart,ncols,flg;
693   PetscInt       *row,*col;
694   PetscBool      other_disassembled;
695   PetscScalar    *val;
696 
697   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
698 
699   PetscFunctionBegin;
700   if (!aij->donotstash && !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n; ) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714 
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719   }
720   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
721   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourselfs, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
731     if (mat->was_assembled && !other_disassembled) {
732       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
737   }
738   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
739   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
741 
742   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
743 
744   aij->rowvalues = 0;
745 
746   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
747   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
748 
749   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
750   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
751     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
752     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
758 {
759   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
760   PetscErrorCode ierr;
761 
762   PetscFunctionBegin;
763   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
764   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
769 {
770   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
771   PetscInt      *lrows;
772   PetscInt       r, len;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   /* get locally owned rows */
777   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
778   /* fix right hand side if needed */
779   if (x && b) {
780     const PetscScalar *xx;
781     PetscScalar       *bb;
782 
783     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
786     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
788   }
789   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
790   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
791   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
792     PetscBool cong;
793     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
794     if (cong) A->congruentlayouts = 1;
795     else      A->congruentlayouts = 0;
796   }
797   if ((diag != 0.0) && A->congruentlayouts) {
798     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
799   } else if (diag != 0.0) {
800     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
801     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
802     for (r = 0; r < len; ++r) {
803       const PetscInt row = lrows[r] + A->rmap->rstart;
804       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
805     }
806     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808   } else {
809     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
810   }
811   ierr = PetscFree(lrows);CHKERRQ(ierr);
812 
813   /* only change matrix nonzero state if pattern was allowed to be changed */
814   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
815     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
816     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817   }
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
935 {
936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
937   PetscErrorCode ierr;
938   PetscInt       nt;
939   VecScatter     Mvctx = a->Mvctx;
940 
941   PetscFunctionBegin;
942   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
943   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
944 
945   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
953 {
954   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
955   PetscErrorCode ierr;
956 
957   PetscFunctionBegin;
958   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
963 {
964   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
965   PetscErrorCode ierr;
966   VecScatter     Mvctx = a->Mvctx;
967 
968   PetscFunctionBegin;
969   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
970   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
971   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
972   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscBool      merged;
982 
983   PetscFunctionBegin;
984   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
985   /* do nondiagonal part */
986   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
987   if (!merged) {
988     /* send it on its way */
989     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990     /* do local part */
991     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
992     /* receive remote parts: note this assumes the values are not actually */
993     /* added in yy until the next line, */
994     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995   } else {
996     /* do local part */
997     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
998     /* send it on its way */
999     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1000     /* values actually were received in the Begin() but we need to call this nop */
1001     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1007 {
1008   MPI_Comm       comm;
1009   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1010   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1011   IS             Me,Notme;
1012   PetscErrorCode ierr;
1013   PetscInt       M,N,first,last,*notme,i;
1014   PetscMPIInt    size;
1015 
1016   PetscFunctionBegin;
1017   /* Easy test: symmetric diagonal block */
1018   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1019   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1020   if (!*f) PetscFunctionReturn(0);
1021   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1023   if (size == 1) PetscFunctionReturn(0);
1024 
1025   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1026   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1027   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1028   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1029   for (i=0; i<first; i++) notme[i] = i;
1030   for (i=last; i<M; i++) notme[i-last+first] = i;
1031   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1032   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1033   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1034   Aoff = Aoffs[0];
1035   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1036   Boff = Boffs[0];
1037   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1039   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1041   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1042   ierr = PetscFree(notme);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1047 {
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1058   PetscErrorCode ierr;
1059 
1060   PetscFunctionBegin;
1061   /* do nondiagonal part */
1062   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1063   /* send it on its way */
1064   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1065   /* do local part */
1066   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1067   /* receive remote parts */
1068   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 /*
1073   This only works correctly for square matrices where the subblock A->A is the
1074    diagonal block
1075 */
1076 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1077 {
1078   PetscErrorCode ierr;
1079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1080 
1081   PetscFunctionBegin;
1082   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1083   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1084   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1095   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1100 {
1101   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1112 #if defined(PETSC_USE_CTABLE)
1113   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1114 #else
1115   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1118   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1119   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1120   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1121   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1122   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1123   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1134 #if defined(PETSC_HAVE_ELEMENTAL)
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1136 #endif
1137 #if defined(PETSC_HAVE_HYPRE)
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1140 #endif
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1147 {
1148   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1149   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1150   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1151   PetscErrorCode ierr;
1152   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1153   int            fd;
1154   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1155   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1156   PetscScalar    *column_values;
1157   PetscInt       message_count,flowcontrolcount;
1158   FILE           *file;
1159 
1160   PetscFunctionBegin;
1161   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1162   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1163   nz   = A->nz + B->nz;
1164   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1165   if (!rank) {
1166     header[0] = MAT_FILE_CLASSID;
1167     header[1] = mat->rmap->N;
1168     header[2] = mat->cmap->N;
1169 
1170     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     /* get largest number of rows any processor has */
1173     rlen  = mat->rmap->n;
1174     range = mat->rmap->range;
1175     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1176   } else {
1177     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     rlen = mat->rmap->n;
1179   }
1180 
1181   /* load up the local row counts */
1182   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1183   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1184 
1185   /* store the row lengths to the file */
1186   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1187   if (!rank) {
1188     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     for (i=1; i<size; i++) {
1190       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1191       rlen = range[i+1] - range[i];
1192       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1193       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1194     }
1195     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1196   } else {
1197     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1198     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1200   }
1201   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1202 
1203   /* load up the local column indices */
1204   nzmax = nz; /* th processor needs space a largest processor needs */
1205   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1206   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1207   cnt   = 0;
1208   for (i=0; i<mat->rmap->n; i++) {
1209     for (j=B->i[i]; j<B->i[i+1]; j++) {
1210       if ((col = garray[B->j[j]]) > cstart) break;
1211       column_indices[cnt++] = col;
1212     }
1213     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1214     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1215   }
1216   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1217 
1218   /* store the column indices to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1227       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1238 
1239   /* load up the local column values */
1240   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1241   cnt  = 0;
1242   for (i=0; i<mat->rmap->n; i++) {
1243     for (j=B->i[i]; j<B->i[i+1]; j++) {
1244       if (garray[B->j[j]] > cstart) break;
1245       column_values[cnt++] = B->a[j];
1246     }
1247     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1248     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column values to the file */
1253   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_values);CHKERRQ(ierr);
1272 
1273   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1274   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #include <petscdraw.h>
1279 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1280 {
1281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1282   PetscErrorCode    ierr;
1283   PetscMPIInt       rank = aij->rank,size = aij->size;
1284   PetscBool         isdraw,iascii,isbinary;
1285   PetscViewer       sviewer;
1286   PetscViewerFormat format;
1287 
1288   PetscFunctionBegin;
1289   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1290   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1291   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1292   if (iascii) {
1293     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1294     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1295       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1296       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1297       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298       for (i=0; i<(PetscInt)size; i++) {
1299         nmax = PetscMax(nmax,nz[i]);
1300         nmin = PetscMin(nmin,nz[i]);
1301         navg += nz[i];
1302       }
1303       ierr = PetscFree(nz);CHKERRQ(ierr);
1304       navg = navg/size;
1305       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     }
1308     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscBool inodes;
1312 
1313       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1314       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1315       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1317       if (!inodes) {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1323       }
1324       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1331       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1332       PetscFunctionReturn(0);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount,inodelimit,*inodes;
1335       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1336       if (inodes) {
1337         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1340       }
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(0);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1348       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1351     }
1352     PetscFunctionReturn(0);
1353   } else if (isdraw) {
1354     PetscDraw draw;
1355     PetscBool isnull;
1356     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1357     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1358     if (isnull) PetscFunctionReturn(0);
1359   }
1360 
1361   {
1362     /* assemble the entire matrix onto first processor. */
1363     Mat        A;
1364     Mat_SeqAIJ *Aloc;
1365     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1366     MatScalar  *a;
1367 
1368     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1369     if (!rank) {
1370       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1371     } else {
1372       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1373     }
1374     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1375     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1376     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1377     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1378     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1379 
1380     /* copy over the A part */
1381     Aloc = (Mat_SeqAIJ*)aij->A->data;
1382     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1383     row  = mat->rmap->rstart;
1384     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1385     for (i=0; i<m; i++) {
1386       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1387       row++;
1388       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1389     }
1390     aj = Aloc->j;
1391     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1392 
1393     /* copy over the B part */
1394     Aloc = (Mat_SeqAIJ*)aij->B->data;
1395     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1396     row  = mat->rmap->rstart;
1397     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1398     ct   = cols;
1399     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1400     for (i=0; i<m; i++) {
1401       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1402       row++;
1403       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1404     }
1405     ierr = PetscFree(ct);CHKERRQ(ierr);
1406     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     /*
1409        Everyone has to call to draw the matrix since the graphics waits are
1410        synchronized across all processors that share the PetscDraw object
1411     */
1412     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1413     if (!rank) {
1414       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1415       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1418     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419     ierr = MatDestroy(&A);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1425 {
1426   PetscErrorCode ierr;
1427   PetscBool      iascii,isdraw,issocket,isbinary;
1428 
1429   PetscFunctionBegin;
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1434   if (iascii || isdraw || isbinary || issocket) {
1435     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1441 {
1442   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1443   PetscErrorCode ierr;
1444   Vec            bb1 = 0;
1445   PetscBool      hasop;
1446 
1447   PetscFunctionBegin;
1448   if (flag == SOR_APPLY_UPPER) {
1449     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450     PetscFunctionReturn(0);
1451   }
1452 
1453   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1454     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1455   }
1456 
1457   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460       its--;
1461     }
1462 
1463     while (its--) {
1464       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1469       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1470 
1471       /* local sweep */
1472       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1473     }
1474   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1477       its--;
1478     }
1479     while (its--) {
1480       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1485       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1486 
1487       /* local sweep */
1488       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1489     }
1490   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1491     if (flag & SOR_ZERO_INITIAL_GUESS) {
1492       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1493       its--;
1494     }
1495     while (its--) {
1496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498 
1499       /* update rhs: bb1 = bb - B*x */
1500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1502 
1503       /* local sweep */
1504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1505     }
1506   } else if (flag & SOR_EISENSTAT) {
1507     Vec xx1;
1508 
1509     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1510     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1511 
1512     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     if (!mat->diag) {
1515       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1516       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1517     }
1518     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1519     if (hasop) {
1520       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1521     } else {
1522       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1523     }
1524     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1525 
1526     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1527 
1528     /* local sweep */
1529     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1530     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1531     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1532   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1533 
1534   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1535 
1536   matin->factorerrortype = mat->A->factorerrortype;
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1541 {
1542   Mat            aA,aB,Aperm;
1543   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1544   PetscScalar    *aa,*ba;
1545   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1546   PetscSF        rowsf,sf;
1547   IS             parcolp = NULL;
1548   PetscBool      done;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1553   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1554   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1555   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1556 
1557   /* Invert row permutation to find out where my rows should go */
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1561   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1562   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1563   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564 
1565   /* Invert column permutation to find out where my columns should go */
1566   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1567   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1568   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1569   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1570   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1571   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1573 
1574   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1575   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1576   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1577 
1578   /* Find out where my gcols should go */
1579   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1580   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1581   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1582   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1583   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1584   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1585   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1587 
1588   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1589   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1591   for (i=0; i<m; i++) {
1592     PetscInt row = rdest[i],rowner;
1593     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1594     for (j=ai[i]; j<ai[i+1]; j++) {
1595       PetscInt cowner,col = cdest[aj[j]];
1596       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1597       if (rowner == cowner) dnnz[i]++;
1598       else onnz[i]++;
1599     }
1600     for (j=bi[i]; j<bi[i+1]; j++) {
1601       PetscInt cowner,col = gcdest[bj[j]];
1602       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1603       if (rowner == cowner) dnnz[i]++;
1604       else onnz[i]++;
1605     }
1606   }
1607   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1608   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1612 
1613   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1614   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1616   for (i=0; i<m; i++) {
1617     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1618     PetscInt j0,rowlen;
1619     rowlen = ai[i+1] - ai[i];
1620     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1621       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1622       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1623     }
1624     rowlen = bi[i+1] - bi[i];
1625     for (j0=j=0; j<rowlen; j0=j) {
1626       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1627       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1628     }
1629   }
1630   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1634   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1636   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1637   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1638   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1639   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1640   *B = Aperm;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1645 {
1646   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1651   if (ghosts) *ghosts = aij->garray;
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1656 {
1657   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1658   Mat            A    = mat->A,B = mat->B;
1659   PetscErrorCode ierr;
1660   PetscReal      isend[5],irecv[5];
1661 
1662   PetscFunctionBegin;
1663   info->block_size = 1.0;
1664   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1665 
1666   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1667   isend[3] = info->memory;  isend[4] = info->mallocs;
1668 
1669   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1670 
1671   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1672   isend[3] += info->memory;  isend[4] += info->mallocs;
1673   if (flag == MAT_LOCAL) {
1674     info->nz_used      = isend[0];
1675     info->nz_allocated = isend[1];
1676     info->nz_unneeded  = isend[2];
1677     info->memory       = isend[3];
1678     info->mallocs      = isend[4];
1679   } else if (flag == MAT_GLOBAL_MAX) {
1680     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1681 
1682     info->nz_used      = irecv[0];
1683     info->nz_allocated = irecv[1];
1684     info->nz_unneeded  = irecv[2];
1685     info->memory       = irecv[3];
1686     info->mallocs      = irecv[4];
1687   } else if (flag == MAT_GLOBAL_SUM) {
1688     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1689 
1690     info->nz_used      = irecv[0];
1691     info->nz_allocated = irecv[1];
1692     info->nz_unneeded  = irecv[2];
1693     info->memory       = irecv[3];
1694     info->mallocs      = irecv[4];
1695   }
1696   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1697   info->fill_ratio_needed = 0;
1698   info->factor_mallocs    = 0;
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1703 {
1704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1705   PetscErrorCode ierr;
1706 
1707   PetscFunctionBegin;
1708   switch (op) {
1709   case MAT_NEW_NONZERO_LOCATIONS:
1710   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1711   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1712   case MAT_KEEP_NONZERO_PATTERN:
1713   case MAT_NEW_NONZERO_LOCATION_ERR:
1714   case MAT_USE_INODES:
1715   case MAT_IGNORE_ZERO_ENTRIES:
1716     MatCheckPreallocated(A,1);
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1719     break;
1720   case MAT_ROW_ORIENTED:
1721     MatCheckPreallocated(A,1);
1722     a->roworiented = flg;
1723 
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_NEW_DIAGONALS:
1728     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1729     break;
1730   case MAT_IGNORE_OFF_PROC_ENTRIES:
1731     a->donotstash = flg;
1732     break;
1733   case MAT_SPD:
1734     A->spd_set = PETSC_TRUE;
1735     A->spd     = flg;
1736     if (flg) {
1737       A->symmetric                  = PETSC_TRUE;
1738       A->structurally_symmetric     = PETSC_TRUE;
1739       A->symmetric_set              = PETSC_TRUE;
1740       A->structurally_symmetric_set = PETSC_TRUE;
1741     }
1742     break;
1743   case MAT_SYMMETRIC:
1744     MatCheckPreallocated(A,1);
1745     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1746     break;
1747   case MAT_STRUCTURALLY_SYMMETRIC:
1748     MatCheckPreallocated(A,1);
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_HERMITIAN:
1752     MatCheckPreallocated(A,1);
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_SYMMETRY_ETERNAL:
1756     MatCheckPreallocated(A,1);
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_SUBMAT_SINGLEIS:
1760     A->submat_singleis = flg;
1761     break;
1762   case MAT_STRUCTURE_ONLY:
1763     /* The option is handled directly by MatSetOption() */
1764     break;
1765   default:
1766     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1767   }
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1772 {
1773   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1774   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1775   PetscErrorCode ierr;
1776   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1777   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1778   PetscInt       *cmap,*idx_p;
1779 
1780   PetscFunctionBegin;
1781   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1782   mat->getrowactive = PETSC_TRUE;
1783 
1784   if (!mat->rowvalues && (idx || v)) {
1785     /*
1786         allocate enough space to hold information from the longest row.
1787     */
1788     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1789     PetscInt   max = 1,tmp;
1790     for (i=0; i<matin->rmap->n; i++) {
1791       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1792       if (max < tmp) max = tmp;
1793     }
1794     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1795   }
1796 
1797   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1798   lrow = row - rstart;
1799 
1800   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1801   if (!v)   {pvA = 0; pvB = 0;}
1802   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1803   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1804   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1805   nztot = nzA + nzB;
1806 
1807   cmap = mat->garray;
1808   if (v  || idx) {
1809     if (nztot) {
1810       /* Sort by increasing column numbers, assuming A and B already sorted */
1811       PetscInt imark = -1;
1812       if (v) {
1813         *v = v_p = mat->rowvalues;
1814         for (i=0; i<nzB; i++) {
1815           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1816           else break;
1817         }
1818         imark = i;
1819         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1820         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1821       }
1822       if (idx) {
1823         *idx = idx_p = mat->rowindices;
1824         if (imark > -1) {
1825           for (i=0; i<imark; i++) {
1826             idx_p[i] = cmap[cworkB[i]];
1827           }
1828         } else {
1829           for (i=0; i<nzB; i++) {
1830             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1831             else break;
1832           }
1833           imark = i;
1834         }
1835         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1836         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1837       }
1838     } else {
1839       if (idx) *idx = 0;
1840       if (v)   *v   = 0;
1841     }
1842   }
1843   *nz  = nztot;
1844   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1845   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1850 {
1851   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1852 
1853   PetscFunctionBegin;
1854   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1855   aij->getrowactive = PETSC_FALSE;
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1860 {
1861   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1862   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1863   PetscErrorCode ierr;
1864   PetscInt       i,j,cstart = mat->cmap->rstart;
1865   PetscReal      sum = 0.0;
1866   MatScalar      *v;
1867 
1868   PetscFunctionBegin;
1869   if (aij->size == 1) {
1870     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1871   } else {
1872     if (type == NORM_FROBENIUS) {
1873       v = amat->a;
1874       for (i=0; i<amat->nz; i++) {
1875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1876       }
1877       v = bmat->a;
1878       for (i=0; i<bmat->nz; i++) {
1879         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1880       }
1881       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1882       *norm = PetscSqrtReal(*norm);
1883       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1884     } else if (type == NORM_1) { /* max column norm */
1885       PetscReal *tmp,*tmp2;
1886       PetscInt  *jj,*garray = aij->garray;
1887       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1888       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1889       *norm = 0.0;
1890       v     = amat->a; jj = amat->j;
1891       for (j=0; j<amat->nz; j++) {
1892         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1893       }
1894       v = bmat->a; jj = bmat->j;
1895       for (j=0; j<bmat->nz; j++) {
1896         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1897       }
1898       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1899       for (j=0; j<mat->cmap->N; j++) {
1900         if (tmp2[j] > *norm) *norm = tmp2[j];
1901       }
1902       ierr = PetscFree(tmp);CHKERRQ(ierr);
1903       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1904       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1905     } else if (type == NORM_INFINITY) { /* max row norm */
1906       PetscReal ntemp = 0.0;
1907       for (j=0; j<aij->A->rmap->n; j++) {
1908         v   = amat->a + amat->i[j];
1909         sum = 0.0;
1910         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1911           sum += PetscAbsScalar(*v); v++;
1912         }
1913         v = bmat->a + bmat->i[j];
1914         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1915           sum += PetscAbsScalar(*v); v++;
1916         }
1917         if (sum > ntemp) ntemp = sum;
1918       }
1919       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1920       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1921     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1927 {
1928   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1929   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1930   PetscErrorCode ierr;
1931   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1932   PetscInt       cstart = A->cmap->rstart,ncol;
1933   Mat            B;
1934   MatScalar      *array;
1935 
1936   PetscFunctionBegin;
1937   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1938 
1939   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1940   ai = Aloc->i; aj = Aloc->j;
1941   bi = Bloc->i; bj = Bloc->j;
1942   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1943     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1944     PetscSFNode          *oloc;
1945     PETSC_UNUSED PetscSF sf;
1946 
1947     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1948     /* compute d_nnz for preallocation */
1949     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1950     for (i=0; i<ai[ma]; i++) {
1951       d_nnz[aj[i]]++;
1952       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1953     }
1954     /* compute local off-diagonal contributions */
1955     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1956     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1957     /* map those to global */
1958     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1959     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1960     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1961     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1962     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1963     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1964     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1965 
1966     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1967     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1968     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1969     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1970     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1971     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1972   } else {
1973     B    = *matout;
1974     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1975     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1976   }
1977 
1978   /* copy over the A part */
1979   array = Aloc->a;
1980   row   = A->rmap->rstart;
1981   for (i=0; i<ma; i++) {
1982     ncol = ai[i+1]-ai[i];
1983     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1984     row++;
1985     array += ncol; aj += ncol;
1986   }
1987   aj = Aloc->j;
1988   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1989 
1990   /* copy over the B part */
1991   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1992   array = Bloc->a;
1993   row   = A->rmap->rstart;
1994   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1995   cols_tmp = cols;
1996   for (i=0; i<mb; i++) {
1997     ncol = bi[i+1]-bi[i];
1998     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1999     row++;
2000     array += ncol; cols_tmp += ncol;
2001   }
2002   ierr = PetscFree(cols);CHKERRQ(ierr);
2003 
2004   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2007     *matout = B;
2008   } else {
2009     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2015 {
2016   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2017   Mat            a    = aij->A,b = aij->B;
2018   PetscErrorCode ierr;
2019   PetscInt       s1,s2,s3;
2020 
2021   PetscFunctionBegin;
2022   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2023   if (rr) {
2024     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2025     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2026     /* Overlap communication with computation. */
2027     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2028   }
2029   if (ll) {
2030     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2031     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2032     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2033   }
2034   /* scale  the diagonal block */
2035   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2036 
2037   if (rr) {
2038     /* Do a scatter end and then right scale the off-diagonal block */
2039     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2040     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2046 {
2047   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2048   PetscErrorCode ierr;
2049 
2050   PetscFunctionBegin;
2051   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2056 {
2057   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2058   Mat            a,b,c,d;
2059   PetscBool      flg;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   a = matA->A; b = matA->B;
2064   c = matB->A; d = matB->B;
2065 
2066   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2067   if (flg) {
2068     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2069   }
2070   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2075 {
2076   PetscErrorCode ierr;
2077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2078   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2079 
2080   PetscFunctionBegin;
2081   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2082   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2083     /* because of the column compression in the off-processor part of the matrix a->B,
2084        the number of columns in a->B and b->B may be different, hence we cannot call
2085        the MatCopy() directly on the two parts. If need be, we can provide a more
2086        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2087        then copying the submatrices */
2088     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2089   } else {
2090     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2091     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2092   }
2093   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2098 {
2099   PetscErrorCode ierr;
2100 
2101   PetscFunctionBegin;
2102   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 /*
2107    Computes the number of nonzeros per row needed for preallocation when X and Y
2108    have different nonzero structure.
2109 */
2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2111 {
2112   PetscInt       i,j,k,nzx,nzy;
2113 
2114   PetscFunctionBegin;
2115   /* Set the number of nonzeros in the new matrix */
2116   for (i=0; i<m; i++) {
2117     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2118     nzx = xi[i+1] - xi[i];
2119     nzy = yi[i+1] - yi[i];
2120     nnz[i] = 0;
2121     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2122       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2123       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2124       nnz[i]++;
2125     }
2126     for (; k<nzy; k++) nnz[i]++;
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2132 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2133 {
2134   PetscErrorCode ierr;
2135   PetscInt       m = Y->rmap->N;
2136   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2137   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2148   PetscBLASInt   bnz,one=1;
2149   Mat_SeqAIJ     *x,*y;
2150 
2151   PetscFunctionBegin;
2152   if (str == SAME_NONZERO_PATTERN) {
2153     PetscScalar alpha = a;
2154     x    = (Mat_SeqAIJ*)xx->A->data;
2155     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2156     y    = (Mat_SeqAIJ*)yy->A->data;
2157     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2158     x    = (Mat_SeqAIJ*)xx->B->data;
2159     y    = (Mat_SeqAIJ*)yy->B->data;
2160     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2161     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2162     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2163   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2164     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2165   } else {
2166     Mat      B;
2167     PetscInt *nnz_d,*nnz_o;
2168     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2169     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2170     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2171     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2172     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2173     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2174     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2175     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2176     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2177     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2178     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2179     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2180     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2181     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2182   }
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2187 
2188 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2189 {
2190 #if defined(PETSC_USE_COMPLEX)
2191   PetscErrorCode ierr;
2192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2193 
2194   PetscFunctionBegin;
2195   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2196   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2197 #else
2198   PetscFunctionBegin;
2199 #endif
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2210   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2215 {
2216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2221   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2226 {
2227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229   PetscInt       i,*idxb = 0;
2230   PetscScalar    *va,*vb;
2231   Vec            vtmp;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2235   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2236   if (idx) {
2237     for (i=0; i<A->rmap->n; i++) {
2238       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2239     }
2240   }
2241 
2242   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2243   if (idx) {
2244     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2245   }
2246   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2247   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2248 
2249   for (i=0; i<A->rmap->n; i++) {
2250     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2251       va[i] = vb[i];
2252       if (idx) idx[i] = a->garray[idxb[i]];
2253     }
2254   }
2255 
2256   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2257   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2258   ierr = PetscFree(idxb);CHKERRQ(ierr);
2259   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2264 {
2265   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267   PetscInt       i,*idxb = 0;
2268   PetscScalar    *va,*vb;
2269   Vec            vtmp;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2273   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2274   if (idx) {
2275     for (i=0; i<A->cmap->n; i++) {
2276       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2277     }
2278   }
2279 
2280   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2281   if (idx) {
2282     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2283   }
2284   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2285   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2286 
2287   for (i=0; i<A->rmap->n; i++) {
2288     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2289       va[i] = vb[i];
2290       if (idx) idx[i] = a->garray[idxb[i]];
2291     }
2292   }
2293 
2294   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2295   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2296   ierr = PetscFree(idxb);CHKERRQ(ierr);
2297   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2302 {
2303   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2304   PetscInt       n      = A->rmap->n;
2305   PetscInt       cstart = A->cmap->rstart;
2306   PetscInt       *cmap  = mat->garray;
2307   PetscInt       *diagIdx, *offdiagIdx;
2308   Vec            diagV, offdiagV;
2309   PetscScalar    *a, *diagA, *offdiagA;
2310   PetscInt       r;
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2315   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2316   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2317   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2318   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2319   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2320   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2321   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2322   for (r = 0; r < n; ++r) {
2323     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2324       a[r]   = diagA[r];
2325       idx[r] = cstart + diagIdx[r];
2326     } else {
2327       a[r]   = offdiagA[r];
2328       idx[r] = cmap[offdiagIdx[r]];
2329     }
2330   }
2331   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2332   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2333   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2334   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2335   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2336   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2343   PetscInt       n      = A->rmap->n;
2344   PetscInt       cstart = A->cmap->rstart;
2345   PetscInt       *cmap  = mat->garray;
2346   PetscInt       *diagIdx, *offdiagIdx;
2347   Vec            diagV, offdiagV;
2348   PetscScalar    *a, *diagA, *offdiagA;
2349   PetscInt       r;
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2354   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2355   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2356   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2357   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2359   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2360   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2361   for (r = 0; r < n; ++r) {
2362     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2363       a[r]   = diagA[r];
2364       idx[r] = cstart + diagIdx[r];
2365     } else {
2366       a[r]   = offdiagA[r];
2367       idx[r] = cmap[offdiagIdx[r]];
2368     }
2369   }
2370   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2373   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2374   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2375   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2380 {
2381   PetscErrorCode ierr;
2382   Mat            *dummy;
2383 
2384   PetscFunctionBegin;
2385   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2386   *newmat = *dummy;
2387   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2392 {
2393   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2398   A->factorerrortype = a->A->factorerrortype;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2403 {
2404   PetscErrorCode ierr;
2405   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2409   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2410   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2416 {
2417   PetscFunctionBegin;
2418   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2419   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 /*@
2424    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2425 
2426    Collective on Mat
2427 
2428    Input Parameters:
2429 +    A - the matrix
2430 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2431 
2432  Level: advanced
2433 
2434 @*/
2435 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2436 {
2437   PetscErrorCode       ierr;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2445 {
2446   PetscErrorCode       ierr;
2447   PetscBool            sc = PETSC_FALSE,flg;
2448 
2449   PetscFunctionBegin;
2450   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2451   ierr = PetscObjectOptionsBegin((PetscObject)A);
2452     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2453     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2454     if (flg) {
2455       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2456     }
2457   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2462 {
2463   PetscErrorCode ierr;
2464   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2465   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2466 
2467   PetscFunctionBegin;
2468   if (!Y->preallocated) {
2469     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2470   } else if (!aij->nz) {
2471     PetscInt nonew = aij->nonew;
2472     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2473     aij->nonew = nonew;
2474   }
2475   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2480 {
2481   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2486   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2487   if (d) {
2488     PetscInt rstart;
2489     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2490     *d += rstart;
2491 
2492   }
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 
2497 /* -------------------------------------------------------------------*/
2498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2499                                        MatGetRow_MPIAIJ,
2500                                        MatRestoreRow_MPIAIJ,
2501                                        MatMult_MPIAIJ,
2502                                 /* 4*/ MatMultAdd_MPIAIJ,
2503                                        MatMultTranspose_MPIAIJ,
2504                                        MatMultTransposeAdd_MPIAIJ,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                 /*10*/ 0,
2509                                        0,
2510                                        0,
2511                                        MatSOR_MPIAIJ,
2512                                        MatTranspose_MPIAIJ,
2513                                 /*15*/ MatGetInfo_MPIAIJ,
2514                                        MatEqual_MPIAIJ,
2515                                        MatGetDiagonal_MPIAIJ,
2516                                        MatDiagonalScale_MPIAIJ,
2517                                        MatNorm_MPIAIJ,
2518                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2519                                        MatAssemblyEnd_MPIAIJ,
2520                                        MatSetOption_MPIAIJ,
2521                                        MatZeroEntries_MPIAIJ,
2522                                 /*24*/ MatZeroRows_MPIAIJ,
2523                                        0,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                 /*29*/ MatSetUp_MPIAIJ,
2528                                        0,
2529                                        0,
2530                                        MatGetDiagonalBlock_MPIAIJ,
2531                                        0,
2532                                 /*34*/ MatDuplicate_MPIAIJ,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                 /*39*/ MatAXPY_MPIAIJ,
2538                                        MatCreateSubMatrices_MPIAIJ,
2539                                        MatIncreaseOverlap_MPIAIJ,
2540                                        MatGetValues_MPIAIJ,
2541                                        MatCopy_MPIAIJ,
2542                                 /*44*/ MatGetRowMax_MPIAIJ,
2543                                        MatScale_MPIAIJ,
2544                                        MatShift_MPIAIJ,
2545                                        MatDiagonalSet_MPIAIJ,
2546                                        MatZeroRowsColumns_MPIAIJ,
2547                                 /*49*/ MatSetRandom_MPIAIJ,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2553                                        0,
2554                                        MatSetUnfactored_MPIAIJ,
2555                                        MatPermute_MPIAIJ,
2556                                        0,
2557                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2558                                        MatDestroy_MPIAIJ,
2559                                        MatView_MPIAIJ,
2560                                        0,
2561                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2562                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2563                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2568                                        MatGetRowMinAbs_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*75*/ MatFDColoringApply_AIJ,
2574                                        MatSetFromOptions_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                        MatFindZeroDiagonals_MPIAIJ,
2578                                 /*80*/ 0,
2579                                        0,
2580                                        0,
2581                                 /*83*/ MatLoad_MPIAIJ,
2582                                        MatIsSymmetric_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2588                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2589                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2590                                        MatPtAP_MPIAIJ_MPIAIJ,
2591                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2592                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                 /*99*/ 0,
2598                                        0,
2599                                        0,
2600                                        MatConjugate_MPIAIJ,
2601                                        0,
2602                                 /*104*/MatSetValuesRow_MPIAIJ,
2603                                        MatRealPart_MPIAIJ,
2604                                        MatImaginaryPart_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                 /*109*/0,
2608                                        0,
2609                                        MatGetRowMin_MPIAIJ,
2610                                        0,
2611                                        MatMissingDiagonal_MPIAIJ,
2612                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2613                                        0,
2614                                        MatGetGhosts_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                 /*119*/0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        MatGetMultiProcBlock_MPIAIJ,
2622                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2623                                        MatGetColumnNorms_MPIAIJ,
2624                                        MatInvertBlockDiagonal_MPIAIJ,
2625                                        0,
2626                                        MatCreateSubMatricesMPI_MPIAIJ,
2627                                 /*129*/0,
2628                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2629                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2630                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2631                                        0,
2632                                 /*134*/0,
2633                                        0,
2634                                        MatRARt_MPIAIJ_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                 /*139*/MatSetBlockSizes_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        MatFDColoringSetUp_MPIXAIJ,
2641                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2642                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2643 };
2644 
2645 /* ----------------------------------------------------------------------------------------*/
2646 
2647 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2648 {
2649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2654   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2659 {
2660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2661   PetscErrorCode ierr;
2662 
2663   PetscFunctionBegin;
2664   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2665   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2670 {
2671   Mat_MPIAIJ     *b;
2672   PetscErrorCode ierr;
2673 
2674   PetscFunctionBegin;
2675   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2676   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2677   b = (Mat_MPIAIJ*)B->data;
2678 
2679 #if defined(PETSC_USE_CTABLE)
2680   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2681 #else
2682   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2683 #endif
2684   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2685   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2686   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2687 
2688   /* Because the B will have been resized we simply destroy it and create a new one each time */
2689   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2690   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2691   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2692   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2693   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2694   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2695 
2696   if (!B->preallocated) {
2697     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2698     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2699     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2700     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2701     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2702   }
2703 
2704   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2705   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2706   B->preallocated  = PETSC_TRUE;
2707   B->was_assembled = PETSC_FALSE;
2708   B->assembled     = PETSC_FALSE;;
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2713 {
2714   Mat_MPIAIJ     *b;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2719   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2720   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2721   b = (Mat_MPIAIJ*)B->data;
2722 
2723 #if defined(PETSC_USE_CTABLE)
2724   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2725 #else
2726   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2727 #endif
2728   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2729   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2730   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2731 
2732   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2733   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2734   B->preallocated  = PETSC_TRUE;
2735   B->was_assembled = PETSC_FALSE;
2736   B->assembled = PETSC_FALSE;
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2741 {
2742   Mat            mat;
2743   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   *newmat = 0;
2748   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2749   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2750   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2751   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2752   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2753   a       = (Mat_MPIAIJ*)mat->data;
2754 
2755   mat->factortype   = matin->factortype;
2756   mat->assembled    = PETSC_TRUE;
2757   mat->insertmode   = NOT_SET_VALUES;
2758   mat->preallocated = PETSC_TRUE;
2759 
2760   a->size         = oldmat->size;
2761   a->rank         = oldmat->rank;
2762   a->donotstash   = oldmat->donotstash;
2763   a->roworiented  = oldmat->roworiented;
2764   a->rowindices   = 0;
2765   a->rowvalues    = 0;
2766   a->getrowactive = PETSC_FALSE;
2767 
2768   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2770 
2771   if (oldmat->colmap) {
2772 #if defined(PETSC_USE_CTABLE)
2773     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2774 #else
2775     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2776     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2777     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2778 #endif
2779   } else a->colmap = 0;
2780   if (oldmat->garray) {
2781     PetscInt len;
2782     len  = oldmat->B->cmap->n;
2783     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2784     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2785     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2786   } else a->garray = 0;
2787 
2788   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2789   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2790   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2791   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2792 
2793   if (oldmat->Mvctx_mpi1) {
2794     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2795     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2796   }
2797 
2798   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2799   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2800   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2801   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2802   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2803   *newmat = mat;
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2808 {
2809   PetscScalar    *vals,*svals;
2810   MPI_Comm       comm;
2811   PetscErrorCode ierr;
2812   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2813   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2814   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2815   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2816   PetscInt       cend,cstart,n,*rowners;
2817   int            fd;
2818   PetscInt       bs = newMat->rmap->bs;
2819 
2820   PetscFunctionBegin;
2821   /* force binary viewer to load .info file if it has not yet done so */
2822   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2823   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2824   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2825   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2826   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2827   if (!rank) {
2828     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2829     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2830     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2831   }
2832 
2833   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2834   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2835   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2836   if (bs < 0) bs = 1;
2837 
2838   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2839   M    = header[1]; N = header[2];
2840 
2841   /* If global sizes are set, check if they are consistent with that given in the file */
2842   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2843   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2844 
2845   /* determine ownership of all (block) rows */
2846   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2847   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2848   else m = newMat->rmap->n; /* Set by user */
2849 
2850   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2851   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2852 
2853   /* First process needs enough room for process with most rows */
2854   if (!rank) {
2855     mmax = rowners[1];
2856     for (i=2; i<=size; i++) {
2857       mmax = PetscMax(mmax, rowners[i]);
2858     }
2859   } else mmax = -1;             /* unused, but compilers complain */
2860 
2861   rowners[0] = 0;
2862   for (i=2; i<=size; i++) {
2863     rowners[i] += rowners[i-1];
2864   }
2865   rstart = rowners[rank];
2866   rend   = rowners[rank+1];
2867 
2868   /* distribute row lengths to all processors */
2869   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2870   if (!rank) {
2871     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2872     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2873     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2874     for (j=0; j<m; j++) {
2875       procsnz[0] += ourlens[j];
2876     }
2877     for (i=1; i<size; i++) {
2878       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2879       /* calculate the number of nonzeros on each processor */
2880       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2881         procsnz[i] += rowlengths[j];
2882       }
2883       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2884     }
2885     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2886   } else {
2887     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2888   }
2889 
2890   if (!rank) {
2891     /* determine max buffer needed and allocate it */
2892     maxnz = 0;
2893     for (i=0; i<size; i++) {
2894       maxnz = PetscMax(maxnz,procsnz[i]);
2895     }
2896     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2897 
2898     /* read in my part of the matrix column indices  */
2899     nz   = procsnz[0];
2900     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2901     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2902 
2903     /* read in every one elses and ship off */
2904     for (i=1; i<size; i++) {
2905       nz   = procsnz[i];
2906       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2907       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2908     }
2909     ierr = PetscFree(cols);CHKERRQ(ierr);
2910   } else {
2911     /* determine buffer space needed for message */
2912     nz = 0;
2913     for (i=0; i<m; i++) {
2914       nz += ourlens[i];
2915     }
2916     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2917 
2918     /* receive message of column indices*/
2919     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2920   }
2921 
2922   /* determine column ownership if matrix is not square */
2923   if (N != M) {
2924     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2925     else n = newMat->cmap->n;
2926     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2927     cstart = cend - n;
2928   } else {
2929     cstart = rstart;
2930     cend   = rend;
2931     n      = cend - cstart;
2932   }
2933 
2934   /* loop over local rows, determining number of off diagonal entries */
2935   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2936   jj   = 0;
2937   for (i=0; i<m; i++) {
2938     for (j=0; j<ourlens[i]; j++) {
2939       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2940       jj++;
2941     }
2942   }
2943 
2944   for (i=0; i<m; i++) {
2945     ourlens[i] -= offlens[i];
2946   }
2947   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2948 
2949   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2950 
2951   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2952 
2953   for (i=0; i<m; i++) {
2954     ourlens[i] += offlens[i];
2955   }
2956 
2957   if (!rank) {
2958     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2959 
2960     /* read in my part of the matrix numerical values  */
2961     nz   = procsnz[0];
2962     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2963 
2964     /* insert into matrix */
2965     jj      = rstart;
2966     smycols = mycols;
2967     svals   = vals;
2968     for (i=0; i<m; i++) {
2969       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2970       smycols += ourlens[i];
2971       svals   += ourlens[i];
2972       jj++;
2973     }
2974 
2975     /* read in other processors and ship out */
2976     for (i=1; i<size; i++) {
2977       nz   = procsnz[i];
2978       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2979       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2980     }
2981     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2982   } else {
2983     /* receive numeric values */
2984     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2985 
2986     /* receive message of values*/
2987     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2988 
2989     /* insert into matrix */
2990     jj      = rstart;
2991     smycols = mycols;
2992     svals   = vals;
2993     for (i=0; i<m; i++) {
2994       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2995       smycols += ourlens[i];
2996       svals   += ourlens[i];
2997       jj++;
2998     }
2999   }
3000   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3001   ierr = PetscFree(vals);CHKERRQ(ierr);
3002   ierr = PetscFree(mycols);CHKERRQ(ierr);
3003   ierr = PetscFree(rowners);CHKERRQ(ierr);
3004   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 /* Not scalable because of ISAllGather() unless getting all columns. */
3010 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3011 {
3012   PetscErrorCode ierr;
3013   IS             iscol_local;
3014   PetscBool      isstride;
3015   PetscMPIInt    lisstride=0,gisstride;
3016 
3017   PetscFunctionBegin;
3018   /* check if we are grabbing all columns*/
3019   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3020 
3021   if (isstride) {
3022     PetscInt  start,len,mstart,mlen;
3023     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3024     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3025     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3026     if (mstart == start && mlen-mstart == len) lisstride = 1;
3027   }
3028 
3029   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3030   if (gisstride) {
3031     PetscInt N;
3032     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3033     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3034     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3035     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3036   } else {
3037     PetscInt cbs;
3038     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3039     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3040     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3041   }
3042 
3043   *isseq = iscol_local;
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 /*
3048  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3049  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3050 
3051  Input Parameters:
3052    mat - matrix
3053    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3054            i.e., mat->rstart <= isrow[i] < mat->rend
3055    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3056            i.e., mat->cstart <= iscol[i] < mat->cend
3057  Output Parameter:
3058    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3059    iscol_o - sequential column index set for retrieving mat->B
3060    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3061  */
3062 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3063 {
3064   PetscErrorCode ierr;
3065   Vec            x,cmap;
3066   const PetscInt *is_idx;
3067   PetscScalar    *xarray,*cmaparray;
3068   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3069   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3070   Mat            B=a->B;
3071   Vec            lvec=a->lvec,lcmap;
3072   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3073   MPI_Comm       comm;
3074   VecScatter     Mvctx=a->Mvctx;
3075 
3076   PetscFunctionBegin;
3077   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3078   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3079 
3080   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3081   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3082   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3083   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3084   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3085 
3086   /* Get start indices */
3087   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3088   isstart -= ncols;
3089   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3090 
3091   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3092   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3093   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3094   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3095   for (i=0; i<ncols; i++) {
3096     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3097     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3098     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3099   }
3100   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3101   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3102   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3103 
3104   /* Get iscol_d */
3105   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3106   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3107   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3108 
3109   /* Get isrow_d */
3110   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3111   rstart = mat->rmap->rstart;
3112   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3113   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3114   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3115   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3116 
3117   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3118   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3119   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3120 
3121   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3122   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124 
3125   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3126 
3127   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3129 
3130   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3131   /* off-process column indices */
3132   count = 0;
3133   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3134   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3135 
3136   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3137   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3138   for (i=0; i<Bn; i++) {
3139     if (PetscRealPart(xarray[i]) > -1.0) {
3140       idx[count]     = i;                   /* local column index in off-diagonal part B */
3141       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3142       count++;
3143     }
3144   }
3145   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3146   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3147 
3148   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3149   /* cannot ensure iscol_o has same blocksize as iscol! */
3150 
3151   ierr = PetscFree(idx);CHKERRQ(ierr);
3152   *garray = cmap1;
3153 
3154   ierr = VecDestroy(&x);CHKERRQ(ierr);
3155   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3156   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3161 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3162 {
3163   PetscErrorCode ierr;
3164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3165   Mat            M = NULL;
3166   MPI_Comm       comm;
3167   IS             iscol_d,isrow_d,iscol_o;
3168   Mat            Asub = NULL,Bsub = NULL;
3169   PetscInt       n;
3170 
3171   PetscFunctionBegin;
3172   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3173 
3174   if (call == MAT_REUSE_MATRIX) {
3175     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3176     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3177     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3178 
3179     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3180     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3181 
3182     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3183     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3184 
3185     /* Update diagonal and off-diagonal portions of submat */
3186     asub = (Mat_MPIAIJ*)(*submat)->data;
3187     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3188     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3189     if (n) {
3190       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3191     }
3192     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3193     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194 
3195   } else { /* call == MAT_INITIAL_MATRIX) */
3196     const PetscInt *garray;
3197     PetscInt        BsubN;
3198 
3199     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3200     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3201 
3202     /* Create local submatrices Asub and Bsub */
3203     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3204     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3205 
3206     /* Create submatrix M */
3207     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3208 
3209     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3210     asub = (Mat_MPIAIJ*)M->data;
3211 
3212     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3213     n = asub->B->cmap->N;
3214     if (BsubN > n) {
3215       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3216       const PetscInt *idx;
3217       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3218       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3219 
3220       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3221       j = 0;
3222       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3223       for (i=0; i<n; i++) {
3224         if (j >= BsubN) break;
3225         while (subgarray[i] > garray[j]) j++;
3226 
3227         if (subgarray[i] == garray[j]) {
3228           idx_new[i] = idx[j++];
3229         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3230       }
3231       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3232 
3233       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3234       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3235 
3236     } else if (BsubN < n) {
3237       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3238     }
3239 
3240     ierr = PetscFree(garray);CHKERRQ(ierr);
3241     *submat = M;
3242 
3243     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3244     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3245     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3246 
3247     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3248     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3249 
3250     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3251     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3252   }
3253   PetscFunctionReturn(0);
3254 }
3255 
3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3257 {
3258   PetscErrorCode ierr;
3259   IS             iscol_local=NULL,isrow_d;
3260   PetscInt       csize;
3261   PetscInt       n,i,j,start,end;
3262   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3263   MPI_Comm       comm;
3264 
3265   PetscFunctionBegin;
3266   /* If isrow has same processor distribution as mat,
3267      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3268   if (call == MAT_REUSE_MATRIX) {
3269     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3270     if (isrow_d) {
3271       sameRowDist  = PETSC_TRUE;
3272       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3273     } else {
3274       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3275       if (iscol_local) {
3276         sameRowDist  = PETSC_TRUE;
3277         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3278       }
3279     }
3280   } else {
3281     /* Check if isrow has same processor distribution as mat */
3282     sameDist[0] = PETSC_FALSE;
3283     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3284     if (!n) {
3285       sameDist[0] = PETSC_TRUE;
3286     } else {
3287       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3288       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3289       if (i >= start && j < end) {
3290         sameDist[0] = PETSC_TRUE;
3291       }
3292     }
3293 
3294     /* Check if iscol has same processor distribution as mat */
3295     sameDist[1] = PETSC_FALSE;
3296     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3297     if (!n) {
3298       sameDist[1] = PETSC_TRUE;
3299     } else {
3300       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3301       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3302       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3303     }
3304 
3305     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3306     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3307     sameRowDist = tsameDist[0];
3308   }
3309 
3310   if (sameRowDist) {
3311     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3312       /* isrow and iscol have same processor distribution as mat */
3313       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3314       PetscFunctionReturn(0);
3315     } else { /* sameRowDist */
3316       /* isrow has same processor distribution as mat */
3317       if (call == MAT_INITIAL_MATRIX) {
3318         PetscBool sorted;
3319         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3320         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3321         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3322         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3323 
3324         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3325         if (sorted) {
3326           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3327           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3328           PetscFunctionReturn(0);
3329         }
3330       } else { /* call == MAT_REUSE_MATRIX */
3331         IS    iscol_sub;
3332         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3333         if (iscol_sub) {
3334           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3335           PetscFunctionReturn(0);
3336         }
3337       }
3338     }
3339   }
3340 
3341   /* General case: iscol -> iscol_local which has global size of iscol */
3342   if (call == MAT_REUSE_MATRIX) {
3343     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3344     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3345   } else {
3346     if (!iscol_local) {
3347       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3348     }
3349   }
3350 
3351   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3352   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3353 
3354   if (call == MAT_INITIAL_MATRIX) {
3355     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3356     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3357   }
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 /*@C
3362      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3363          and "off-diagonal" part of the matrix in CSR format.
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  comm - MPI communicator
3369 .  A - "diagonal" portion of matrix
3370 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3371 -  garray - global index of B columns
3372 
3373    Output Parameter:
3374 .   mat - the matrix, with input A as its local diagonal matrix
3375    Level: advanced
3376 
3377    Notes:
3378        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3379        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3380 
3381 .seealso: MatCreateMPIAIJWithSplitArrays()
3382 @*/
3383 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3384 {
3385   PetscErrorCode ierr;
3386   Mat_MPIAIJ     *maij;
3387   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3388   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3389   PetscScalar    *oa=b->a;
3390   Mat            Bnew;
3391   PetscInt       m,n,N;
3392 
3393   PetscFunctionBegin;
3394   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3395   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3396   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3397   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3398   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3399   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3400 
3401   /* Get global columns of mat */
3402   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3403 
3404   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3405   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3406   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3407   maij = (Mat_MPIAIJ*)(*mat)->data;
3408 
3409   (*mat)->preallocated = PETSC_TRUE;
3410 
3411   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3412   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3413 
3414   /* Set A as diagonal portion of *mat */
3415   maij->A = A;
3416 
3417   nz = oi[m];
3418   for (i=0; i<nz; i++) {
3419     col   = oj[i];
3420     oj[i] = garray[col];
3421   }
3422 
3423    /* Set Bnew as off-diagonal portion of *mat */
3424   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3425   bnew        = (Mat_SeqAIJ*)Bnew->data;
3426   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3427   maij->B     = Bnew;
3428 
3429   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3430 
3431   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3432   b->free_a       = PETSC_FALSE;
3433   b->free_ij      = PETSC_FALSE;
3434   ierr = MatDestroy(&B);CHKERRQ(ierr);
3435 
3436   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3437   bnew->free_a       = PETSC_TRUE;
3438   bnew->free_ij      = PETSC_TRUE;
3439 
3440   /* condense columns of maij->B */
3441   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3442   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3443   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3444   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3445   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3450 
3451 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3452 {
3453   PetscErrorCode ierr;
3454   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3455   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3456   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3457   Mat            M,Msub,B=a->B;
3458   MatScalar      *aa;
3459   Mat_SeqAIJ     *aij;
3460   PetscInt       *garray = a->garray,*colsub,Ncols;
3461   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3462   IS             iscol_sub,iscmap;
3463   const PetscInt *is_idx,*cmap;
3464   PetscBool      allcolumns=PETSC_FALSE;
3465   MPI_Comm       comm;
3466 
3467   PetscFunctionBegin;
3468   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3469 
3470   if (call == MAT_REUSE_MATRIX) {
3471     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3472     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3473     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3474 
3475     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3476     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3477 
3478     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3479     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3480 
3481     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3482 
3483   } else { /* call == MAT_INITIAL_MATRIX) */
3484     PetscBool flg;
3485 
3486     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3487     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3488 
3489     /* (1) iscol -> nonscalable iscol_local */
3490     /* Check for special case: each processor gets entire matrix columns */
3491     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3492     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3493     if (allcolumns) {
3494       iscol_sub = iscol_local;
3495       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3496       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3497 
3498     } else {
3499       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3500       PetscInt *idx,*cmap1,k;
3501       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3502       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3503       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3504       count = 0;
3505       k     = 0;
3506       for (i=0; i<Ncols; i++) {
3507         j = is_idx[i];
3508         if (j >= cstart && j < cend) {
3509           /* diagonal part of mat */
3510           idx[count]     = j;
3511           cmap1[count++] = i; /* column index in submat */
3512         } else if (Bn) {
3513           /* off-diagonal part of mat */
3514           if (j == garray[k]) {
3515             idx[count]     = j;
3516             cmap1[count++] = i;  /* column index in submat */
3517           } else if (j > garray[k]) {
3518             while (j > garray[k] && k < Bn-1) k++;
3519             if (j == garray[k]) {
3520               idx[count]     = j;
3521               cmap1[count++] = i; /* column index in submat */
3522             }
3523           }
3524         }
3525       }
3526       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3527 
3528       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3529       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3530       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3531 
3532       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3533     }
3534 
3535     /* (3) Create sequential Msub */
3536     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3537   }
3538 
3539   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3540   aij  = (Mat_SeqAIJ*)(Msub)->data;
3541   ii   = aij->i;
3542   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3543 
3544   /*
3545       m - number of local rows
3546       Ncols - number of columns (same on all processors)
3547       rstart - first row in new global matrix generated
3548   */
3549   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3550 
3551   if (call == MAT_INITIAL_MATRIX) {
3552     /* (4) Create parallel newmat */
3553     PetscMPIInt    rank,size;
3554     PetscInt       csize;
3555 
3556     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3557     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3558 
3559     /*
3560         Determine the number of non-zeros in the diagonal and off-diagonal
3561         portions of the matrix in order to do correct preallocation
3562     */
3563 
3564     /* first get start and end of "diagonal" columns */
3565     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3566     if (csize == PETSC_DECIDE) {
3567       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3568       if (mglobal == Ncols) { /* square matrix */
3569         nlocal = m;
3570       } else {
3571         nlocal = Ncols/size + ((Ncols % size) > rank);
3572       }
3573     } else {
3574       nlocal = csize;
3575     }
3576     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3577     rstart = rend - nlocal;
3578     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3579 
3580     /* next, compute all the lengths */
3581     jj    = aij->j;
3582     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3583     olens = dlens + m;
3584     for (i=0; i<m; i++) {
3585       jend = ii[i+1] - ii[i];
3586       olen = 0;
3587       dlen = 0;
3588       for (j=0; j<jend; j++) {
3589         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3590         else dlen++;
3591         jj++;
3592       }
3593       olens[i] = olen;
3594       dlens[i] = dlen;
3595     }
3596 
3597     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3598     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3599 
3600     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3601     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3602     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3603     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3604     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3605     ierr = PetscFree(dlens);CHKERRQ(ierr);
3606 
3607   } else { /* call == MAT_REUSE_MATRIX */
3608     M    = *newmat;
3609     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3610     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3611     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3612     /*
3613          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3614        rather than the slower MatSetValues().
3615     */
3616     M->was_assembled = PETSC_TRUE;
3617     M->assembled     = PETSC_FALSE;
3618   }
3619 
3620   /* (5) Set values of Msub to *newmat */
3621   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3622   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3623 
3624   jj   = aij->j;
3625   aa   = aij->a;
3626   for (i=0; i<m; i++) {
3627     row = rstart + i;
3628     nz  = ii[i+1] - ii[i];
3629     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3630     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3631     jj += nz; aa += nz;
3632   }
3633   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3634 
3635   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637 
3638   ierr = PetscFree(colsub);CHKERRQ(ierr);
3639 
3640   /* save Msub, iscol_sub and iscmap used in processor for next request */
3641   if (call ==  MAT_INITIAL_MATRIX) {
3642     *newmat = M;
3643     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3644     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3645 
3646     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3647     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3648 
3649     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3650     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3651 
3652     if (iscol_local) {
3653       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3654       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3655     }
3656   }
3657   PetscFunctionReturn(0);
3658 }
3659 
3660 /*
3661     Not great since it makes two copies of the submatrix, first an SeqAIJ
3662   in local and then by concatenating the local matrices the end result.
3663   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3664 
3665   Note: This requires a sequential iscol with all indices.
3666 */
3667 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscMPIInt    rank,size;
3671   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3672   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3673   Mat            M,Mreuse;
3674   MatScalar      *aa,*vwork;
3675   MPI_Comm       comm;
3676   Mat_SeqAIJ     *aij;
3677   PetscBool      colflag,allcolumns=PETSC_FALSE;
3678 
3679   PetscFunctionBegin;
3680   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3683 
3684   /* Check for special case: each processor gets entire matrix columns */
3685   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3686   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3687   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3688 
3689   if (call ==  MAT_REUSE_MATRIX) {
3690     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3691     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3692     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3693   } else {
3694     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3695   }
3696 
3697   /*
3698       m - number of local rows
3699       n - number of columns (same on all processors)
3700       rstart - first row in new global matrix generated
3701   */
3702   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3703   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3704   if (call == MAT_INITIAL_MATRIX) {
3705     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3706     ii  = aij->i;
3707     jj  = aij->j;
3708 
3709     /*
3710         Determine the number of non-zeros in the diagonal and off-diagonal
3711         portions of the matrix in order to do correct preallocation
3712     */
3713 
3714     /* first get start and end of "diagonal" columns */
3715     if (csize == PETSC_DECIDE) {
3716       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3717       if (mglobal == n) { /* square matrix */
3718         nlocal = m;
3719       } else {
3720         nlocal = n/size + ((n % size) > rank);
3721       }
3722     } else {
3723       nlocal = csize;
3724     }
3725     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3726     rstart = rend - nlocal;
3727     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3728 
3729     /* next, compute all the lengths */
3730     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3731     olens = dlens + m;
3732     for (i=0; i<m; i++) {
3733       jend = ii[i+1] - ii[i];
3734       olen = 0;
3735       dlen = 0;
3736       for (j=0; j<jend; j++) {
3737         if (*jj < rstart || *jj >= rend) olen++;
3738         else dlen++;
3739         jj++;
3740       }
3741       olens[i] = olen;
3742       dlens[i] = dlen;
3743     }
3744     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3745     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3746     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3747     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3748     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3749     ierr = PetscFree(dlens);CHKERRQ(ierr);
3750   } else {
3751     PetscInt ml,nl;
3752 
3753     M    = *newmat;
3754     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3755     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3756     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3757     /*
3758          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3759        rather than the slower MatSetValues().
3760     */
3761     M->was_assembled = PETSC_TRUE;
3762     M->assembled     = PETSC_FALSE;
3763   }
3764   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3765   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3766   ii   = aij->i;
3767   jj   = aij->j;
3768   aa   = aij->a;
3769   for (i=0; i<m; i++) {
3770     row   = rstart + i;
3771     nz    = ii[i+1] - ii[i];
3772     cwork = jj;     jj += nz;
3773     vwork = aa;     aa += nz;
3774     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3775   }
3776 
3777   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3778   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   *newmat = M;
3780 
3781   /* save submatrix used in processor for next request */
3782   if (call ==  MAT_INITIAL_MATRIX) {
3783     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3784     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3785   }
3786   PetscFunctionReturn(0);
3787 }
3788 
3789 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3790 {
3791   PetscInt       m,cstart, cend,j,nnz,i,d;
3792   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3793   const PetscInt *JJ;
3794   PetscScalar    *values;
3795   PetscErrorCode ierr;
3796   PetscBool      nooffprocentries;
3797 
3798   PetscFunctionBegin;
3799   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3800 
3801   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3802   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3803   m      = B->rmap->n;
3804   cstart = B->cmap->rstart;
3805   cend   = B->cmap->rend;
3806   rstart = B->rmap->rstart;
3807 
3808   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3809 
3810 #if defined(PETSC_USE_DEBUG)
3811   for (i=0; i<m; i++) {
3812     nnz = Ii[i+1]- Ii[i];
3813     JJ  = J + Ii[i];
3814     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3815     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3816     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3817   }
3818 #endif
3819 
3820   for (i=0; i<m; i++) {
3821     nnz     = Ii[i+1]- Ii[i];
3822     JJ      = J + Ii[i];
3823     nnz_max = PetscMax(nnz_max,nnz);
3824     d       = 0;
3825     for (j=0; j<nnz; j++) {
3826       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3827     }
3828     d_nnz[i] = d;
3829     o_nnz[i] = nnz - d;
3830   }
3831   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3832   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3833 
3834   if (v) values = (PetscScalar*)v;
3835   else {
3836     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3837   }
3838 
3839   for (i=0; i<m; i++) {
3840     ii   = i + rstart;
3841     nnz  = Ii[i+1]- Ii[i];
3842     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3843   }
3844   nooffprocentries    = B->nooffprocentries;
3845   B->nooffprocentries = PETSC_TRUE;
3846   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3848   B->nooffprocentries = nooffprocentries;
3849 
3850   if (!v) {
3851     ierr = PetscFree(values);CHKERRQ(ierr);
3852   }
3853   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 /*@
3858    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3859    (the default parallel PETSc format).
3860 
3861    Collective on MPI_Comm
3862 
3863    Input Parameters:
3864 +  B - the matrix
3865 .  i - the indices into j for the start of each local row (starts with zero)
3866 .  j - the column indices for each local row (starts with zero)
3867 -  v - optional values in the matrix
3868 
3869    Level: developer
3870 
3871    Notes:
3872        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3873      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3874      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3875 
3876        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3877 
3878        The format which is used for the sparse matrix input, is equivalent to a
3879     row-major ordering.. i.e for the following matrix, the input data expected is
3880     as shown
3881 
3882 $        1 0 0
3883 $        2 0 3     P0
3884 $       -------
3885 $        4 5 6     P1
3886 $
3887 $     Process0 [P0]: rows_owned=[0,1]
3888 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3889 $        j =  {0,0,2}  [size = 3]
3890 $        v =  {1,2,3}  [size = 3]
3891 $
3892 $     Process1 [P1]: rows_owned=[2]
3893 $        i =  {0,3}    [size = nrow+1  = 1+1]
3894 $        j =  {0,1,2}  [size = 3]
3895 $        v =  {4,5,6}  [size = 3]
3896 
3897 .keywords: matrix, aij, compressed row, sparse, parallel
3898 
3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3900           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3901 @*/
3902 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3903 {
3904   PetscErrorCode ierr;
3905 
3906   PetscFunctionBegin;
3907   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 /*@C
3912    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3913    (the default parallel PETSc format).  For good matrix assembly performance
3914    the user should preallocate the matrix storage by setting the parameters
3915    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3916    performance can be increased by more than a factor of 50.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3923            (same value is used for all local rows)
3924 .  d_nnz - array containing the number of nonzeros in the various rows of the
3925            DIAGONAL portion of the local submatrix (possibly different for each row)
3926            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3927            The size of this array is equal to the number of local rows, i.e 'm'.
3928            For matrices that will be factored, you must leave room for (and set)
3929            the diagonal entry even if it is zero.
3930 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3931            submatrix (same value is used for all local rows).
3932 -  o_nnz - array containing the number of nonzeros in the various rows of the
3933            OFF-DIAGONAL portion of the local submatrix (possibly different for
3934            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3935            structure. The size of this array is equal to the number
3936            of local rows, i.e 'm'.
3937 
3938    If the *_nnz parameter is given then the *_nz parameter is ignored
3939 
3940    The AIJ format (also called the Yale sparse matrix format or
3941    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3942    storage.  The stored row and column indices begin with zero.
3943    See Users-Manual: ch_mat for details.
3944 
3945    The parallel matrix is partitioned such that the first m0 rows belong to
3946    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3947    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3948 
3949    The DIAGONAL portion of the local submatrix of a processor can be defined
3950    as the submatrix which is obtained by extraction the part corresponding to
3951    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3952    first row that belongs to the processor, r2 is the last row belonging to
3953    the this processor, and c1-c2 is range of indices of the local part of a
3954    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3955    common case of a square matrix, the row and column ranges are the same and
3956    the DIAGONAL part is also square. The remaining portion of the local
3957    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3958 
3959    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3960 
3961    You can call MatGetInfo() to get information on how effective the preallocation was;
3962    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3963    You can also run with the option -info and look for messages with the string
3964    malloc in them to see if additional memory allocation was needed.
3965 
3966    Example usage:
3967 
3968    Consider the following 8x8 matrix with 34 non-zero values, that is
3969    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3970    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3971    as follows:
3972 
3973 .vb
3974             1  2  0  |  0  3  0  |  0  4
3975     Proc0   0  5  6  |  7  0  0  |  8  0
3976             9  0 10  | 11  0  0  | 12  0
3977     -------------------------------------
3978            13  0 14  | 15 16 17  |  0  0
3979     Proc1   0 18  0  | 19 20 21  |  0  0
3980             0  0  0  | 22 23  0  | 24  0
3981     -------------------------------------
3982     Proc2  25 26 27  |  0  0 28  | 29  0
3983            30  0  0  | 31 32 33  |  0 34
3984 .ve
3985 
3986    This can be represented as a collection of submatrices as:
3987 
3988 .vb
3989       A B C
3990       D E F
3991       G H I
3992 .ve
3993 
3994    Where the submatrices A,B,C are owned by proc0, D,E,F are
3995    owned by proc1, G,H,I are owned by proc2.
3996 
3997    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'M','N' parameters are 8,8, and have the same values on all procs.
4000 
4001    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4002    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4003    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4004    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4005    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4006    matrix, ans [DF] as another SeqAIJ matrix.
4007 
4008    When d_nz, o_nz parameters are specified, d_nz storage elements are
4009    allocated for every row of the local diagonal submatrix, and o_nz
4010    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4011    One way to choose d_nz and o_nz is to use the max nonzerors per local
4012    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4013    In this case, the values of d_nz,o_nz are:
4014 .vb
4015      proc0 : dnz = 2, o_nz = 2
4016      proc1 : dnz = 3, o_nz = 2
4017      proc2 : dnz = 1, o_nz = 4
4018 .ve
4019    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4020    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4021    for proc3. i.e we are using 12+15+10=37 storage locations to store
4022    34 values.
4023 
4024    When d_nnz, o_nnz parameters are specified, the storage is specified
4025    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4026    In the above case the values for d_nnz,o_nnz are:
4027 .vb
4028      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4029      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4030      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4031 .ve
4032    Here the space allocated is sum of all the above values i.e 34, and
4033    hence pre-allocation is perfect.
4034 
4035    Level: intermediate
4036 
4037 .keywords: matrix, aij, compressed row, sparse, parallel
4038 
4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4040           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4041 @*/
4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4043 {
4044   PetscErrorCode ierr;
4045 
4046   PetscFunctionBegin;
4047   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4048   PetscValidType(B,1);
4049   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 /*@
4054      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4055          CSR format the local rows.
4056 
4057    Collective on MPI_Comm
4058 
4059    Input Parameters:
4060 +  comm - MPI communicator
4061 .  m - number of local rows (Cannot be PETSC_DECIDE)
4062 .  n - This value should be the same as the local size used in creating the
4063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4064        calculated if N is given) For square matrices n is almost always m.
4065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4067 .   i - row indices
4068 .   j - column indices
4069 -   a - matrix values
4070 
4071    Output Parameter:
4072 .   mat - the matrix
4073 
4074    Level: intermediate
4075 
4076    Notes:
4077        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4078      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4079      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4080 
4081        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4082 
4083        The format which is used for the sparse matrix input, is equivalent to a
4084     row-major ordering.. i.e for the following matrix, the input data expected is
4085     as shown
4086 
4087 $        1 0 0
4088 $        2 0 3     P0
4089 $       -------
4090 $        4 5 6     P1
4091 $
4092 $     Process0 [P0]: rows_owned=[0,1]
4093 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4094 $        j =  {0,0,2}  [size = 3]
4095 $        v =  {1,2,3}  [size = 3]
4096 $
4097 $     Process1 [P1]: rows_owned=[2]
4098 $        i =  {0,3}    [size = nrow+1  = 1+1]
4099 $        j =  {0,1,2}  [size = 3]
4100 $        v =  {4,5,6}  [size = 3]
4101 
4102 .keywords: matrix, aij, compressed row, sparse, parallel
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4105           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4106 @*/
4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4108 {
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4113   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4114   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4115   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4116   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4117   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4118   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4119   PetscFunctionReturn(0);
4120 }
4121 
4122 /*@C
4123    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4124    (the default parallel PETSc format).  For good matrix assembly performance
4125    the user should preallocate the matrix storage by setting the parameters
4126    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4127    performance can be increased by more than a factor of 50.
4128 
4129    Collective on MPI_Comm
4130 
4131    Input Parameters:
4132 +  comm - MPI communicator
4133 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4134            This value should be the same as the local size used in creating the
4135            y vector for the matrix-vector product y = Ax.
4136 .  n - This value should be the same as the local size used in creating the
4137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4138        calculated if N is given) For square matrices n is almost always m.
4139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4141 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4142            (same value is used for all local rows)
4143 .  d_nnz - array containing the number of nonzeros in the various rows of the
4144            DIAGONAL portion of the local submatrix (possibly different for each row)
4145            or NULL, if d_nz is used to specify the nonzero structure.
4146            The size of this array is equal to the number of local rows, i.e 'm'.
4147 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4148            submatrix (same value is used for all local rows).
4149 -  o_nnz - array containing the number of nonzeros in the various rows of the
4150            OFF-DIAGONAL portion of the local submatrix (possibly different for
4151            each row) or NULL, if o_nz is used to specify the nonzero
4152            structure. The size of this array is equal to the number
4153            of local rows, i.e 'm'.
4154 
4155    Output Parameter:
4156 .  A - the matrix
4157 
4158    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4159    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4160    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4161 
4162    Notes:
4163    If the *_nnz parameter is given then the *_nz parameter is ignored
4164 
4165    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4166    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4167    storage requirements for this matrix.
4168 
4169    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4170    processor than it must be used on all processors that share the object for
4171    that argument.
4172 
4173    The user MUST specify either the local or global matrix dimensions
4174    (possibly both).
4175 
4176    The parallel matrix is partitioned across processors such that the
4177    first m0 rows belong to process 0, the next m1 rows belong to
4178    process 1, the next m2 rows belong to process 2 etc.. where
4179    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4180    values corresponding to [m x N] submatrix.
4181 
4182    The columns are logically partitioned with the n0 columns belonging
4183    to 0th partition, the next n1 columns belonging to the next
4184    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4185 
4186    The DIAGONAL portion of the local submatrix on any given processor
4187    is the submatrix corresponding to the rows and columns m,n
4188    corresponding to the given processor. i.e diagonal matrix on
4189    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4190    etc. The remaining portion of the local submatrix [m x (N-n)]
4191    constitute the OFF-DIAGONAL portion. The example below better
4192    illustrates this concept.
4193 
4194    For a square global matrix we define each processor's diagonal portion
4195    to be its local rows and the corresponding columns (a square submatrix);
4196    each processor's off-diagonal portion encompasses the remainder of the
4197    local matrix (a rectangular submatrix).
4198 
4199    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4200 
4201    When calling this routine with a single process communicator, a matrix of
4202    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4203    type of communicator, use the construction mechanism
4204 .vb
4205      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4206 .ve
4207 
4208 $     MatCreate(...,&A);
4209 $     MatSetType(A,MATMPIAIJ);
4210 $     MatSetSizes(A, m,n,M,N);
4211 $     MatMPIAIJSetPreallocation(A,...);
4212 
4213    By default, this format uses inodes (identical nodes) when possible.
4214    We search for consecutive rows with the same nonzero structure, thereby
4215    reusing matrix information to achieve increased efficiency.
4216 
4217    Options Database Keys:
4218 +  -mat_no_inode  - Do not use inodes
4219 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4220 
4221 
4222 
4223    Example usage:
4224 
4225    Consider the following 8x8 matrix with 34 non-zero values, that is
4226    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4227    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4228    as follows
4229 
4230 .vb
4231             1  2  0  |  0  3  0  |  0  4
4232     Proc0   0  5  6  |  7  0  0  |  8  0
4233             9  0 10  | 11  0  0  | 12  0
4234     -------------------------------------
4235            13  0 14  | 15 16 17  |  0  0
4236     Proc1   0 18  0  | 19 20 21  |  0  0
4237             0  0  0  | 22 23  0  | 24  0
4238     -------------------------------------
4239     Proc2  25 26 27  |  0  0 28  | 29  0
4240            30  0  0  | 31 32 33  |  0 34
4241 .ve
4242 
4243    This can be represented as a collection of submatrices as
4244 
4245 .vb
4246       A B C
4247       D E F
4248       G H I
4249 .ve
4250 
4251    Where the submatrices A,B,C are owned by proc0, D,E,F are
4252    owned by proc1, G,H,I are owned by proc2.
4253 
4254    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4256    The 'M','N' parameters are 8,8, and have the same values on all procs.
4257 
4258    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4259    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4260    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4261    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4262    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4263    matrix, ans [DF] as another SeqAIJ matrix.
4264 
4265    When d_nz, o_nz parameters are specified, d_nz storage elements are
4266    allocated for every row of the local diagonal submatrix, and o_nz
4267    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4268    One way to choose d_nz and o_nz is to use the max nonzerors per local
4269    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4270    In this case, the values of d_nz,o_nz are
4271 .vb
4272      proc0 : dnz = 2, o_nz = 2
4273      proc1 : dnz = 3, o_nz = 2
4274      proc2 : dnz = 1, o_nz = 4
4275 .ve
4276    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4277    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4278    for proc3. i.e we are using 12+15+10=37 storage locations to store
4279    34 values.
4280 
4281    When d_nnz, o_nnz parameters are specified, the storage is specified
4282    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4283    In the above case the values for d_nnz,o_nnz are
4284 .vb
4285      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4286      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4287      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4288 .ve
4289    Here the space allocated is sum of all the above values i.e 34, and
4290    hence pre-allocation is perfect.
4291 
4292    Level: intermediate
4293 
4294 .keywords: matrix, aij, compressed row, sparse, parallel
4295 
4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4297           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4298 @*/
4299 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4300 {
4301   PetscErrorCode ierr;
4302   PetscMPIInt    size;
4303 
4304   PetscFunctionBegin;
4305   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4306   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4307   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4308   if (size > 1) {
4309     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4310     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4311   } else {
4312     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4313     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4314   }
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4319 {
4320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4321   PetscBool      flg;
4322   PetscErrorCode ierr;
4323 
4324   PetscFunctionBegin;
4325   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4326   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4327   if (Ad)     *Ad     = a->A;
4328   if (Ao)     *Ao     = a->B;
4329   if (colmap) *colmap = a->garray;
4330   PetscFunctionReturn(0);
4331 }
4332 
4333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       m,N,i,rstart,nnz,Ii;
4337   PetscInt       *indx;
4338   PetscScalar    *values;
4339 
4340   PetscFunctionBegin;
4341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4342   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4343     PetscInt       *dnz,*onz,sum,bs,cbs;
4344 
4345     if (n == PETSC_DECIDE) {
4346       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4347     }
4348     /* Check sum(n) = N */
4349     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4350     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4351 
4352     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4353     rstart -= m;
4354 
4355     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4356     for (i=0; i<m; i++) {
4357       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4358       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4359       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4360     }
4361 
4362     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4363     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4364     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4365     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4366     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4367     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4368     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4369     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4370   }
4371 
4372   /* numeric phase */
4373   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4374   for (i=0; i<m; i++) {
4375     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376     Ii   = i + rstart;
4377     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4378     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4379   }
4380   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4381   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4386 {
4387   PetscErrorCode    ierr;
4388   PetscMPIInt       rank;
4389   PetscInt          m,N,i,rstart,nnz;
4390   size_t            len;
4391   const PetscInt    *indx;
4392   PetscViewer       out;
4393   char              *name;
4394   Mat               B;
4395   const PetscScalar *values;
4396 
4397   PetscFunctionBegin;
4398   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4399   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4400   /* Should this be the type of the diagonal block of A? */
4401   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4402   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4403   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4404   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4405   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4406   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4407   for (i=0; i<m; i++) {
4408     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4409     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4410     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4411   }
4412   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4414 
4415   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4416   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4417   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4418   sprintf(name,"%s.%d",outfile,rank);
4419   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4420   ierr = PetscFree(name);CHKERRQ(ierr);
4421   ierr = MatView(B,out);CHKERRQ(ierr);
4422   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4423   ierr = MatDestroy(&B);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4428 {
4429   PetscErrorCode      ierr;
4430   Mat_Merge_SeqsToMPI *merge;
4431   PetscContainer      container;
4432 
4433   PetscFunctionBegin;
4434   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4435   if (container) {
4436     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4448     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4449     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4450     ierr = PetscFree(merge);CHKERRQ(ierr);
4451     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4452   }
4453   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 #include <../src/mat/utils/freespace.h>
4458 #include <petscbt.h>
4459 
4460 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4461 {
4462   PetscErrorCode      ierr;
4463   MPI_Comm            comm;
4464   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4465   PetscMPIInt         size,rank,taga,*len_s;
4466   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4467   PetscInt            proc,m;
4468   PetscInt            **buf_ri,**buf_rj;
4469   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4470   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4471   MPI_Request         *s_waits,*r_waits;
4472   MPI_Status          *status;
4473   MatScalar           *aa=a->a;
4474   MatScalar           **abuf_r,*ba_i;
4475   Mat_Merge_SeqsToMPI *merge;
4476   PetscContainer      container;
4477 
4478   PetscFunctionBegin;
4479   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4480   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4481 
4482   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4484 
4485   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4486   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4487 
4488   bi     = merge->bi;
4489   bj     = merge->bj;
4490   buf_ri = merge->buf_ri;
4491   buf_rj = merge->buf_rj;
4492 
4493   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4494   owners = merge->rowmap->range;
4495   len_s  = merge->len_s;
4496 
4497   /* send and recv matrix values */
4498   /*-----------------------------*/
4499   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4500   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4501 
4502   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4503   for (proc=0,k=0; proc<size; proc++) {
4504     if (!len_s[proc]) continue;
4505     i    = owners[proc];
4506     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4507     k++;
4508   }
4509 
4510   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4511   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4512   ierr = PetscFree(status);CHKERRQ(ierr);
4513 
4514   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4515   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4516 
4517   /* insert mat values of mpimat */
4518   /*----------------------------*/
4519   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4520   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4521 
4522   for (k=0; k<merge->nrecv; k++) {
4523     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4524     nrows       = *(buf_ri_k[k]);
4525     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4526     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4527   }
4528 
4529   /* set values of ba */
4530   m = merge->rowmap->n;
4531   for (i=0; i<m; i++) {
4532     arow = owners[rank] + i;
4533     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4534     bnzi = bi[i+1] - bi[i];
4535     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4536 
4537     /* add local non-zero vals of this proc's seqmat into ba */
4538     anzi   = ai[arow+1] - ai[arow];
4539     aj     = a->j + ai[arow];
4540     aa     = a->a + ai[arow];
4541     nextaj = 0;
4542     for (j=0; nextaj<anzi; j++) {
4543       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4544         ba_i[j] += aa[nextaj++];
4545       }
4546     }
4547 
4548     /* add received vals into ba */
4549     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4550       /* i-th row */
4551       if (i == *nextrow[k]) {
4552         anzi   = *(nextai[k]+1) - *nextai[k];
4553         aj     = buf_rj[k] + *(nextai[k]);
4554         aa     = abuf_r[k] + *(nextai[k]);
4555         nextaj = 0;
4556         for (j=0; nextaj<anzi; j++) {
4557           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4558             ba_i[j] += aa[nextaj++];
4559           }
4560         }
4561         nextrow[k]++; nextai[k]++;
4562       }
4563     }
4564     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4565   }
4566   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4567   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4568 
4569   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4570   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4571   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4572   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4573   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4578 {
4579   PetscErrorCode      ierr;
4580   Mat                 B_mpi;
4581   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4582   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4583   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4584   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4585   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4586   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4587   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4588   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4589   MPI_Status          *status;
4590   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4591   PetscBT             lnkbt;
4592   Mat_Merge_SeqsToMPI *merge;
4593   PetscContainer      container;
4594 
4595   PetscFunctionBegin;
4596   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4597 
4598   /* make sure it is a PETSc comm */
4599   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4602 
4603   ierr = PetscNew(&merge);CHKERRQ(ierr);
4604   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4605 
4606   /* determine row ownership */
4607   /*---------------------------------------------------------*/
4608   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4613   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4614   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4615 
4616   m      = merge->rowmap->n;
4617   owners = merge->rowmap->range;
4618 
4619   /* determine the number of messages to send, their lengths */
4620   /*---------------------------------------------------------*/
4621   len_s = merge->len_s;
4622 
4623   len          = 0; /* length of buf_si[] */
4624   merge->nsend = 0;
4625   for (proc=0; proc<size; proc++) {
4626     len_si[proc] = 0;
4627     if (proc == rank) {
4628       len_s[proc] = 0;
4629     } else {
4630       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4631       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4632     }
4633     if (len_s[proc]) {
4634       merge->nsend++;
4635       nrows = 0;
4636       for (i=owners[proc]; i<owners[proc+1]; i++) {
4637         if (ai[i+1] > ai[i]) nrows++;
4638       }
4639       len_si[proc] = 2*(nrows+1);
4640       len         += len_si[proc];
4641     }
4642   }
4643 
4644   /* determine the number and length of messages to receive for ij-structure */
4645   /*-------------------------------------------------------------------------*/
4646   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4647   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4648 
4649   /* post the Irecv of j-structure */
4650   /*-------------------------------*/
4651   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4652   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4653 
4654   /* post the Isend of j-structure */
4655   /*--------------------------------*/
4656   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4657 
4658   for (proc=0, k=0; proc<size; proc++) {
4659     if (!len_s[proc]) continue;
4660     i    = owners[proc];
4661     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4662     k++;
4663   }
4664 
4665   /* receives and sends of j-structure are complete */
4666   /*------------------------------------------------*/
4667   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4668   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4669 
4670   /* send and recv i-structure */
4671   /*---------------------------*/
4672   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4673   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4674 
4675   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4676   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4677   for (proc=0,k=0; proc<size; proc++) {
4678     if (!len_s[proc]) continue;
4679     /* form outgoing message for i-structure:
4680          buf_si[0]:                 nrows to be sent
4681                [1:nrows]:           row index (global)
4682                [nrows+1:2*nrows+1]: i-structure index
4683     */
4684     /*-------------------------------------------*/
4685     nrows       = len_si[proc]/2 - 1;
4686     buf_si_i    = buf_si + nrows+1;
4687     buf_si[0]   = nrows;
4688     buf_si_i[0] = 0;
4689     nrows       = 0;
4690     for (i=owners[proc]; i<owners[proc+1]; i++) {
4691       anzi = ai[i+1] - ai[i];
4692       if (anzi) {
4693         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4694         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4695         nrows++;
4696       }
4697     }
4698     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4699     k++;
4700     buf_si += len_si[proc];
4701   }
4702 
4703   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4704   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4705 
4706   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4707   for (i=0; i<merge->nrecv; i++) {
4708     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4709   }
4710 
4711   ierr = PetscFree(len_si);CHKERRQ(ierr);
4712   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4713   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4714   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4715   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4716   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4717   ierr = PetscFree(status);CHKERRQ(ierr);
4718 
4719   /* compute a local seq matrix in each processor */
4720   /*----------------------------------------------*/
4721   /* allocate bi array and free space for accumulating nonzero column info */
4722   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4723   bi[0] = 0;
4724 
4725   /* create and initialize a linked list */
4726   nlnk = N+1;
4727   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4728 
4729   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4730   len  = ai[owners[rank+1]] - ai[owners[rank]];
4731   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4732 
4733   current_space = free_space;
4734 
4735   /* determine symbolic info for each local row */
4736   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4737 
4738   for (k=0; k<merge->nrecv; k++) {
4739     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4740     nrows       = *buf_ri_k[k];
4741     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4742     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4743   }
4744 
4745   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4746   len  = 0;
4747   for (i=0; i<m; i++) {
4748     bnzi = 0;
4749     /* add local non-zero cols of this proc's seqmat into lnk */
4750     arow  = owners[rank] + i;
4751     anzi  = ai[arow+1] - ai[arow];
4752     aj    = a->j + ai[arow];
4753     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4754     bnzi += nlnk;
4755     /* add received col data into lnk */
4756     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4757       if (i == *nextrow[k]) { /* i-th row */
4758         anzi  = *(nextai[k]+1) - *nextai[k];
4759         aj    = buf_rj[k] + *nextai[k];
4760         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4761         bnzi += nlnk;
4762         nextrow[k]++; nextai[k]++;
4763       }
4764     }
4765     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4766 
4767     /* if free space is not available, make more free space */
4768     if (current_space->local_remaining<bnzi) {
4769       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4770       nspacedouble++;
4771     }
4772     /* copy data into free space, then initialize lnk */
4773     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4774     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4775 
4776     current_space->array           += bnzi;
4777     current_space->local_used      += bnzi;
4778     current_space->local_remaining -= bnzi;
4779 
4780     bi[i+1] = bi[i] + bnzi;
4781   }
4782 
4783   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4784 
4785   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4786   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4787   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4788 
4789   /* create symbolic parallel matrix B_mpi */
4790   /*---------------------------------------*/
4791   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4792   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4793   if (n==PETSC_DECIDE) {
4794     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4795   } else {
4796     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4797   }
4798   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4799   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4800   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4801   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4802   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4803 
4804   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4805   B_mpi->assembled    = PETSC_FALSE;
4806   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4807   merge->bi           = bi;
4808   merge->bj           = bj;
4809   merge->buf_ri       = buf_ri;
4810   merge->buf_rj       = buf_rj;
4811   merge->coi          = NULL;
4812   merge->coj          = NULL;
4813   merge->owners_co    = NULL;
4814 
4815   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4816 
4817   /* attach the supporting struct to B_mpi for reuse */
4818   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4819   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4820   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4821   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4822   *mpimat = B_mpi;
4823 
4824   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4825   PetscFunctionReturn(0);
4826 }
4827 
4828 /*@C
4829       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4830                  matrices from each processor
4831 
4832     Collective on MPI_Comm
4833 
4834    Input Parameters:
4835 +    comm - the communicators the parallel matrix will live on
4836 .    seqmat - the input sequential matrices
4837 .    m - number of local rows (or PETSC_DECIDE)
4838 .    n - number of local columns (or PETSC_DECIDE)
4839 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4840 
4841    Output Parameter:
4842 .    mpimat - the parallel matrix generated
4843 
4844     Level: advanced
4845 
4846    Notes:
4847      The dimensions of the sequential matrix in each processor MUST be the same.
4848      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4849      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4850 @*/
4851 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4852 {
4853   PetscErrorCode ierr;
4854   PetscMPIInt    size;
4855 
4856   PetscFunctionBegin;
4857   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4858   if (size == 1) {
4859     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4860     if (scall == MAT_INITIAL_MATRIX) {
4861       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4862     } else {
4863       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4864     }
4865     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4866     PetscFunctionReturn(0);
4867   }
4868   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4869   if (scall == MAT_INITIAL_MATRIX) {
4870     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4871   }
4872   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4873   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 /*@
4878      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4879           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4880           with MatGetSize()
4881 
4882     Not Collective
4883 
4884    Input Parameters:
4885 +    A - the matrix
4886 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4887 
4888    Output Parameter:
4889 .    A_loc - the local sequential matrix generated
4890 
4891     Level: developer
4892 
4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4894 
4895 @*/
4896 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4897 {
4898   PetscErrorCode ierr;
4899   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4900   Mat_SeqAIJ     *mat,*a,*b;
4901   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4902   MatScalar      *aa,*ba,*cam;
4903   PetscScalar    *ca;
4904   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4905   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4906   PetscBool      match;
4907   MPI_Comm       comm;
4908   PetscMPIInt    size;
4909 
4910   PetscFunctionBegin;
4911   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4912   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4913   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4915   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4916 
4917   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4918   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4919   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4920   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4921   aa = a->a; ba = b->a;
4922   if (scall == MAT_INITIAL_MATRIX) {
4923     if (size == 1) {
4924       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4925       PetscFunctionReturn(0);
4926     }
4927 
4928     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4929     ci[0] = 0;
4930     for (i=0; i<am; i++) {
4931       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4932     }
4933     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4934     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4935     k    = 0;
4936     for (i=0; i<am; i++) {
4937       ncols_o = bi[i+1] - bi[i];
4938       ncols_d = ai[i+1] - ai[i];
4939       /* off-diagonal portion of A */
4940       for (jo=0; jo<ncols_o; jo++) {
4941         col = cmap[*bj];
4942         if (col >= cstart) break;
4943         cj[k]   = col; bj++;
4944         ca[k++] = *ba++;
4945       }
4946       /* diagonal portion of A */
4947       for (j=0; j<ncols_d; j++) {
4948         cj[k]   = cstart + *aj++;
4949         ca[k++] = *aa++;
4950       }
4951       /* off-diagonal portion of A */
4952       for (j=jo; j<ncols_o; j++) {
4953         cj[k]   = cmap[*bj++];
4954         ca[k++] = *ba++;
4955       }
4956     }
4957     /* put together the new matrix */
4958     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4959     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4960     /* Since these are PETSc arrays, change flags to free them as necessary. */
4961     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4962     mat->free_a  = PETSC_TRUE;
4963     mat->free_ij = PETSC_TRUE;
4964     mat->nonew   = 0;
4965   } else if (scall == MAT_REUSE_MATRIX) {
4966     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4967     ci = mat->i; cj = mat->j; cam = mat->a;
4968     for (i=0; i<am; i++) {
4969       /* off-diagonal portion of A */
4970       ncols_o = bi[i+1] - bi[i];
4971       for (jo=0; jo<ncols_o; jo++) {
4972         col = cmap[*bj];
4973         if (col >= cstart) break;
4974         *cam++ = *ba++; bj++;
4975       }
4976       /* diagonal portion of A */
4977       ncols_d = ai[i+1] - ai[i];
4978       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4979       /* off-diagonal portion of A */
4980       for (j=jo; j<ncols_o; j++) {
4981         *cam++ = *ba++; bj++;
4982       }
4983     }
4984   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4985   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /*@C
4990      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4991 
4992     Not Collective
4993 
4994    Input Parameters:
4995 +    A - the matrix
4996 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4997 -    row, col - index sets of rows and columns to extract (or NULL)
4998 
4999    Output Parameter:
5000 .    A_loc - the local sequential matrix generated
5001 
5002     Level: developer
5003 
5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5005 
5006 @*/
5007 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5008 {
5009   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5010   PetscErrorCode ierr;
5011   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5012   IS             isrowa,iscola;
5013   Mat            *aloc;
5014   PetscBool      match;
5015 
5016   PetscFunctionBegin;
5017   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5018   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5019   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5020   if (!row) {
5021     start = A->rmap->rstart; end = A->rmap->rend;
5022     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5023   } else {
5024     isrowa = *row;
5025   }
5026   if (!col) {
5027     start = A->cmap->rstart;
5028     cmap  = a->garray;
5029     nzA   = a->A->cmap->n;
5030     nzB   = a->B->cmap->n;
5031     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5032     ncols = 0;
5033     for (i=0; i<nzB; i++) {
5034       if (cmap[i] < start) idx[ncols++] = cmap[i];
5035       else break;
5036     }
5037     imark = i;
5038     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5039     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5040     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5041   } else {
5042     iscola = *col;
5043   }
5044   if (scall != MAT_INITIAL_MATRIX) {
5045     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5046     aloc[0] = *A_loc;
5047   }
5048   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5049   *A_loc = aloc[0];
5050   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5051   if (!row) {
5052     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5053   }
5054   if (!col) {
5055     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5056   }
5057   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5058   PetscFunctionReturn(0);
5059 }
5060 
5061 /*@C
5062     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5063 
5064     Collective on Mat
5065 
5066    Input Parameters:
5067 +    A,B - the matrices in mpiaij format
5068 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5069 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5070 
5071    Output Parameter:
5072 +    rowb, colb - index sets of rows and columns of B to extract
5073 -    B_seq - the sequential matrix generated
5074 
5075     Level: developer
5076 
5077 @*/
5078 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5079 {
5080   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5081   PetscErrorCode ierr;
5082   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5083   IS             isrowb,iscolb;
5084   Mat            *bseq=NULL;
5085 
5086   PetscFunctionBegin;
5087   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5088     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5089   }
5090   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5091 
5092   if (scall == MAT_INITIAL_MATRIX) {
5093     start = A->cmap->rstart;
5094     cmap  = a->garray;
5095     nzA   = a->A->cmap->n;
5096     nzB   = a->B->cmap->n;
5097     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5098     ncols = 0;
5099     for (i=0; i<nzB; i++) {  /* row < local row index */
5100       if (cmap[i] < start) idx[ncols++] = cmap[i];
5101       else break;
5102     }
5103     imark = i;
5104     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5105     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5106     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5107     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5108   } else {
5109     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5110     isrowb  = *rowb; iscolb = *colb;
5111     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5112     bseq[0] = *B_seq;
5113   }
5114   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5115   *B_seq = bseq[0];
5116   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5117   if (!rowb) {
5118     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5119   } else {
5120     *rowb = isrowb;
5121   }
5122   if (!colb) {
5123     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5124   } else {
5125     *colb = iscolb;
5126   }
5127   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5128   PetscFunctionReturn(0);
5129 }
5130 
5131 /*
5132     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5133     of the OFF-DIAGONAL portion of local A
5134 
5135     Collective on Mat
5136 
5137    Input Parameters:
5138 +    A,B - the matrices in mpiaij format
5139 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5140 
5141    Output Parameter:
5142 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5143 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5144 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5145 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5146 
5147     Level: developer
5148 
5149 */
5150 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5151 {
5152   VecScatter_MPI_General *gen_to,*gen_from;
5153   PetscErrorCode         ierr;
5154   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5155   Mat_SeqAIJ             *b_oth;
5156   VecScatter             ctx;
5157   MPI_Comm               comm;
5158   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5159   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5160   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5161   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5162   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5163   MPI_Request            *rwaits = NULL,*swaits = NULL;
5164   MPI_Status             *sstatus,rstatus;
5165   PetscMPIInt            jj,size;
5166   VecScatterType         type;
5167   PetscBool              mpi1;
5168 
5169   PetscFunctionBegin;
5170   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5172 
5173   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5174     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5175   }
5176   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5178 
5179   if (size == 1) {
5180     startsj_s = NULL;
5181     bufa_ptr  = NULL;
5182     *B_oth    = NULL;
5183     PetscFunctionReturn(0);
5184   }
5185 
5186   ctx = a->Mvctx;
5187   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5188   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5189   if (!mpi1) {
5190     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5191      thus create a->Mvctx_mpi1 */
5192     if (!a->Mvctx_mpi1) {
5193       a->Mvctx_mpi1_flg = PETSC_TRUE;
5194       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5195     }
5196     ctx = a->Mvctx_mpi1;
5197   }
5198   tag = ((PetscObject)ctx)->tag;
5199 
5200   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5201   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5202   nrecvs   = gen_from->n;
5203   nsends   = gen_to->n;
5204 
5205   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5206   srow    = gen_to->indices;    /* local row index to be sent */
5207   sstarts = gen_to->starts;
5208   sprocs  = gen_to->procs;
5209   sstatus = gen_to->sstatus;
5210   sbs     = gen_to->bs;
5211   rstarts = gen_from->starts;
5212   rprocs  = gen_from->procs;
5213   rbs     = gen_from->bs;
5214 
5215   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5216   if (scall == MAT_INITIAL_MATRIX) {
5217     /* i-array */
5218     /*---------*/
5219     /*  post receives */
5220     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5221     for (i=0; i<nrecvs; i++) {
5222       rowlen = rvalues + rstarts[i]*rbs;
5223       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5224       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5225     }
5226 
5227     /* pack the outgoing message */
5228     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5229 
5230     sstartsj[0] = 0;
5231     rstartsj[0] = 0;
5232     len         = 0; /* total length of j or a array to be sent */
5233     k           = 0;
5234     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5235     for (i=0; i<nsends; i++) {
5236       rowlen = svalues + sstarts[i]*sbs;
5237       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5238       for (j=0; j<nrows; j++) {
5239         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5240         for (l=0; l<sbs; l++) {
5241           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5242 
5243           rowlen[j*sbs+l] = ncols;
5244 
5245           len += ncols;
5246           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5247         }
5248         k++;
5249       }
5250       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5251 
5252       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5253     }
5254     /* recvs and sends of i-array are completed */
5255     i = nrecvs;
5256     while (i--) {
5257       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5258     }
5259     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5260     ierr = PetscFree(svalues);CHKERRQ(ierr);
5261 
5262     /* allocate buffers for sending j and a arrays */
5263     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5264     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5265 
5266     /* create i-array of B_oth */
5267     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5268 
5269     b_othi[0] = 0;
5270     len       = 0; /* total length of j or a array to be received */
5271     k         = 0;
5272     for (i=0; i<nrecvs; i++) {
5273       rowlen = rvalues + rstarts[i]*rbs;
5274       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5275       for (j=0; j<nrows; j++) {
5276         b_othi[k+1] = b_othi[k] + rowlen[j];
5277         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5278         k++;
5279       }
5280       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5281     }
5282     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5283 
5284     /* allocate space for j and a arrrays of B_oth */
5285     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5286     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5287 
5288     /* j-array */
5289     /*---------*/
5290     /*  post receives of j-array */
5291     for (i=0; i<nrecvs; i++) {
5292       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5293       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5294     }
5295 
5296     /* pack the outgoing message j-array */
5297     k = 0;
5298     for (i=0; i<nsends; i++) {
5299       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5300       bufJ  = bufj+sstartsj[i];
5301       for (j=0; j<nrows; j++) {
5302         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5303         for (ll=0; ll<sbs; ll++) {
5304           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5305           for (l=0; l<ncols; l++) {
5306             *bufJ++ = cols[l];
5307           }
5308           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5309         }
5310       }
5311       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5312     }
5313 
5314     /* recvs and sends of j-array are completed */
5315     i = nrecvs;
5316     while (i--) {
5317       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5318     }
5319     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5320   } else if (scall == MAT_REUSE_MATRIX) {
5321     sstartsj = *startsj_s;
5322     rstartsj = *startsj_r;
5323     bufa     = *bufa_ptr;
5324     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5325     b_otha   = b_oth->a;
5326   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5327 
5328   /* a-array */
5329   /*---------*/
5330   /*  post receives of a-array */
5331   for (i=0; i<nrecvs; i++) {
5332     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5333     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5334   }
5335 
5336   /* pack the outgoing message a-array */
5337   k = 0;
5338   for (i=0; i<nsends; i++) {
5339     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5340     bufA  = bufa+sstartsj[i];
5341     for (j=0; j<nrows; j++) {
5342       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5343       for (ll=0; ll<sbs; ll++) {
5344         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5345         for (l=0; l<ncols; l++) {
5346           *bufA++ = vals[l];
5347         }
5348         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5349       }
5350     }
5351     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5352   }
5353   /* recvs and sends of a-array are completed */
5354   i = nrecvs;
5355   while (i--) {
5356     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5357   }
5358   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5359   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5360 
5361   if (scall == MAT_INITIAL_MATRIX) {
5362     /* put together the new matrix */
5363     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5364 
5365     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5366     /* Since these are PETSc arrays, change flags to free them as necessary. */
5367     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5368     b_oth->free_a  = PETSC_TRUE;
5369     b_oth->free_ij = PETSC_TRUE;
5370     b_oth->nonew   = 0;
5371 
5372     ierr = PetscFree(bufj);CHKERRQ(ierr);
5373     if (!startsj_s || !bufa_ptr) {
5374       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5375       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5376     } else {
5377       *startsj_s = sstartsj;
5378       *startsj_r = rstartsj;
5379       *bufa_ptr  = bufa;
5380     }
5381   }
5382   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5383   PetscFunctionReturn(0);
5384 }
5385 
5386 /*@C
5387   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5388 
5389   Not Collective
5390 
5391   Input Parameters:
5392 . A - The matrix in mpiaij format
5393 
5394   Output Parameter:
5395 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5396 . colmap - A map from global column index to local index into lvec
5397 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5398 
5399   Level: developer
5400 
5401 @*/
5402 #if defined(PETSC_USE_CTABLE)
5403 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5404 #else
5405 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5406 #endif
5407 {
5408   Mat_MPIAIJ *a;
5409 
5410   PetscFunctionBegin;
5411   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5412   PetscValidPointer(lvec, 2);
5413   PetscValidPointer(colmap, 3);
5414   PetscValidPointer(multScatter, 4);
5415   a = (Mat_MPIAIJ*) A->data;
5416   if (lvec) *lvec = a->lvec;
5417   if (colmap) *colmap = a->colmap;
5418   if (multScatter) *multScatter = a->Mvctx;
5419   PetscFunctionReturn(0);
5420 }
5421 
5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5424 #if defined(PETSC_HAVE_MKL_SPARSE)
5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5426 #endif
5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5428 #if defined(PETSC_HAVE_ELEMENTAL)
5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5430 #endif
5431 #if defined(PETSC_HAVE_HYPRE)
5432 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5433 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5434 #endif
5435 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5436 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5437 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5438 
5439 /*
5440     Computes (B'*A')' since computing B*A directly is untenable
5441 
5442                n                       p                          p
5443         (              )       (              )         (                  )
5444       m (      A       )  *  n (       B      )   =   m (         C        )
5445         (              )       (              )         (                  )
5446 
5447 */
5448 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5449 {
5450   PetscErrorCode ierr;
5451   Mat            At,Bt,Ct;
5452 
5453   PetscFunctionBegin;
5454   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5455   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5456   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5457   ierr = MatDestroy(&At);CHKERRQ(ierr);
5458   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5459   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5460   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5461   PetscFunctionReturn(0);
5462 }
5463 
5464 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5465 {
5466   PetscErrorCode ierr;
5467   PetscInt       m=A->rmap->n,n=B->cmap->n;
5468   Mat            Cmat;
5469 
5470   PetscFunctionBegin;
5471   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5472   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5473   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5474   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5475   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5476   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5477   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5478   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5479 
5480   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5481 
5482   *C = Cmat;
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 /* ----------------------------------------------------------------*/
5487 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5488 {
5489   PetscErrorCode ierr;
5490 
5491   PetscFunctionBegin;
5492   if (scall == MAT_INITIAL_MATRIX) {
5493     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5494     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5495     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5496   }
5497   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5498   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5499   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 /*MC
5504    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5505 
5506    Options Database Keys:
5507 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5508 
5509   Level: beginner
5510 
5511 .seealso: MatCreateAIJ()
5512 M*/
5513 
5514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5515 {
5516   Mat_MPIAIJ     *b;
5517   PetscErrorCode ierr;
5518   PetscMPIInt    size;
5519 
5520   PetscFunctionBegin;
5521   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5522 
5523   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5524   B->data       = (void*)b;
5525   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5526   B->assembled  = PETSC_FALSE;
5527   B->insertmode = NOT_SET_VALUES;
5528   b->size       = size;
5529 
5530   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5531 
5532   /* build cache for off array entries formed */
5533   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5534 
5535   b->donotstash  = PETSC_FALSE;
5536   b->colmap      = 0;
5537   b->garray      = 0;
5538   b->roworiented = PETSC_TRUE;
5539 
5540   /* stuff used for matrix vector multiply */
5541   b->lvec  = NULL;
5542   b->Mvctx = NULL;
5543 
5544   /* stuff for MatGetRow() */
5545   b->rowindices   = 0;
5546   b->rowvalues    = 0;
5547   b->getrowactive = PETSC_FALSE;
5548 
5549   /* flexible pointer used in CUSP/CUSPARSE classes */
5550   b->spptr = NULL;
5551 
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5561 #if defined(PETSC_HAVE_MKL_SPARSE)
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5563 #endif
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5566 #if defined(PETSC_HAVE_ELEMENTAL)
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5568 #endif
5569 #if defined(PETSC_HAVE_HYPRE)
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5571 #endif
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5577 #if defined(PETSC_HAVE_HYPRE)
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5579 #endif
5580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5581   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5582   PetscFunctionReturn(0);
5583 }
5584 
5585 /*@C
5586      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5587          and "off-diagonal" part of the matrix in CSR format.
5588 
5589    Collective on MPI_Comm
5590 
5591    Input Parameters:
5592 +  comm - MPI communicator
5593 .  m - number of local rows (Cannot be PETSC_DECIDE)
5594 .  n - This value should be the same as the local size used in creating the
5595        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5596        calculated if N is given) For square matrices n is almost always m.
5597 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5598 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5599 .   i - row indices for "diagonal" portion of matrix
5600 .   j - column indices
5601 .   a - matrix values
5602 .   oi - row indices for "off-diagonal" portion of matrix
5603 .   oj - column indices
5604 -   oa - matrix values
5605 
5606    Output Parameter:
5607 .   mat - the matrix
5608 
5609    Level: advanced
5610 
5611    Notes:
5612        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5613        must free the arrays once the matrix has been destroyed and not before.
5614 
5615        The i and j indices are 0 based
5616 
5617        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5618 
5619        This sets local rows and cannot be used to set off-processor values.
5620 
5621        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5622        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5623        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5624        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5625        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5626        communication if it is known that only local entries will be set.
5627 
5628 .keywords: matrix, aij, compressed row, sparse, parallel
5629 
5630 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5631           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5632 @*/
5633 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5634 {
5635   PetscErrorCode ierr;
5636   Mat_MPIAIJ     *maij;
5637 
5638   PetscFunctionBegin;
5639   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5640   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5641   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5642   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5643   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5644   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5645   maij = (Mat_MPIAIJ*) (*mat)->data;
5646 
5647   (*mat)->preallocated = PETSC_TRUE;
5648 
5649   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5650   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5651 
5652   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5653   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5654 
5655   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5656   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5657   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659 
5660   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5661   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5662   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5663   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5664   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5665   PetscFunctionReturn(0);
5666 }
5667 
5668 /*
5669     Special version for direct calls from Fortran
5670 */
5671 #include <petsc/private/fortranimpl.h>
5672 
5673 /* Change these macros so can be used in void function */
5674 #undef CHKERRQ
5675 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5676 #undef SETERRQ2
5677 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5678 #undef SETERRQ3
5679 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5680 #undef SETERRQ
5681 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5682 
5683 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5684 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5685 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5686 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5687 #else
5688 #endif
5689 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5690 {
5691   Mat            mat  = *mmat;
5692   PetscInt       m    = *mm, n = *mn;
5693   InsertMode     addv = *maddv;
5694   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5695   PetscScalar    value;
5696   PetscErrorCode ierr;
5697 
5698   MatCheckPreallocated(mat,1);
5699   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5700 
5701 #if defined(PETSC_USE_DEBUG)
5702   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5703 #endif
5704   {
5705     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5706     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5707     PetscBool roworiented = aij->roworiented;
5708 
5709     /* Some Variables required in the macro */
5710     Mat        A                 = aij->A;
5711     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5712     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5713     MatScalar  *aa               = a->a;
5714     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5715     Mat        B                 = aij->B;
5716     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5717     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5718     MatScalar  *ba               = b->a;
5719 
5720     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5721     PetscInt  nonew = a->nonew;
5722     MatScalar *ap1,*ap2;
5723 
5724     PetscFunctionBegin;
5725     for (i=0; i<m; i++) {
5726       if (im[i] < 0) continue;
5727 #if defined(PETSC_USE_DEBUG)
5728       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5729 #endif
5730       if (im[i] >= rstart && im[i] < rend) {
5731         row      = im[i] - rstart;
5732         lastcol1 = -1;
5733         rp1      = aj + ai[row];
5734         ap1      = aa + ai[row];
5735         rmax1    = aimax[row];
5736         nrow1    = ailen[row];
5737         low1     = 0;
5738         high1    = nrow1;
5739         lastcol2 = -1;
5740         rp2      = bj + bi[row];
5741         ap2      = ba + bi[row];
5742         rmax2    = bimax[row];
5743         nrow2    = bilen[row];
5744         low2     = 0;
5745         high2    = nrow2;
5746 
5747         for (j=0; j<n; j++) {
5748           if (roworiented) value = v[i*n+j];
5749           else value = v[i+j*m];
5750           if (in[j] >= cstart && in[j] < cend) {
5751             col = in[j] - cstart;
5752             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5753             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5754           } else if (in[j] < 0) continue;
5755 #if defined(PETSC_USE_DEBUG)
5756           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5757           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5758 #endif
5759           else {
5760             if (mat->was_assembled) {
5761               if (!aij->colmap) {
5762                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5763               }
5764 #if defined(PETSC_USE_CTABLE)
5765               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5766               col--;
5767 #else
5768               col = aij->colmap[in[j]] - 1;
5769 #endif
5770               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5771               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5772                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5773                 col  =  in[j];
5774                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5775                 B     = aij->B;
5776                 b     = (Mat_SeqAIJ*)B->data;
5777                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5778                 rp2   = bj + bi[row];
5779                 ap2   = ba + bi[row];
5780                 rmax2 = bimax[row];
5781                 nrow2 = bilen[row];
5782                 low2  = 0;
5783                 high2 = nrow2;
5784                 bm    = aij->B->rmap->n;
5785                 ba    = b->a;
5786               }
5787             } else col = in[j];
5788             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5789           }
5790         }
5791       } else if (!aij->donotstash) {
5792         if (roworiented) {
5793           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5794         } else {
5795           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5796         }
5797       }
5798     }
5799   }
5800   PetscFunctionReturnVoid();
5801 }
5802 
5803