xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c08c7cb97c8a96079b8cb5b2881afccfb9915bda)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) {
608               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
609                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
610               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
611             }
612           } else col = in[j];
613           nonew = b->nonew;
614           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
615         }
616       }
617     } else {
618       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
619       if (!aij->donotstash) {
620         mat->assembled = PETSC_FALSE;
621         if (roworiented) {
622           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
623         } else {
624           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         }
626       }
627     }
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
637   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
638 
639   PetscFunctionBegin;
640   for (i=0; i<m; i++) {
641     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
642     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
643     if (idxm[i] >= rstart && idxm[i] < rend) {
644       row = idxm[i] - rstart;
645       for (j=0; j<n; j++) {
646         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
647         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
648         if (idxn[j] >= cstart && idxn[j] < cend) {
649           col  = idxn[j] - cstart;
650           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
651         } else {
652           if (!aij->colmap) {
653             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
654           }
655 #if defined(PETSC_USE_CTABLE)
656           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
657           col--;
658 #else
659           col = aij->colmap[idxn[j]] - 1;
660 #endif
661           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
662           else {
663             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
664           }
665         }
666       }
667     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
673 
674 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
675 {
676   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
677   PetscErrorCode ierr;
678   PetscInt       nstash,reallocs;
679 
680   PetscFunctionBegin;
681   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
682 
683   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
684   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
685   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
690 {
691   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
692   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
693   PetscErrorCode ierr;
694   PetscMPIInt    n;
695   PetscInt       i,j,rstart,ncols,flg;
696   PetscInt       *row,*col;
697   PetscBool      other_disassembled;
698   PetscScalar    *val;
699 
700   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
701 
702   PetscFunctionBegin;
703   if (!aij->donotstash && !mat->nooffprocentries) {
704     while (1) {
705       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
706       if (!flg) break;
707 
708       for (i=0; i<n; ) {
709         /* Now identify the consecutive vals belonging to the same row */
710         for (j=i,rstart=row[j]; j<n; j++) {
711           if (row[j] != rstart) break;
712         }
713         if (j < n) ncols = j-i;
714         else       ncols = n-i;
715         /* Now assemble all these values with a single function call */
716         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
717 
718         i = j;
719       }
720     }
721     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
722   }
723   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
724   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
725 
726   /* determine if any processor has disassembled, if so we must
727      also disassemble ourselfs, in order that we may reassemble. */
728   /*
729      if nonzero structure of submatrix B cannot change then we know that
730      no processor disassembled thus we can skip this stuff
731   */
732   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
733     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734     if (mat->was_assembled && !other_disassembled) {
735       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
736     }
737   }
738   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
739     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
740   }
741   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
742   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
743   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
744 
745   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
746 
747   aij->rowvalues = 0;
748 
749   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
750   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
761 {
762   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764 
765   PetscFunctionBegin;
766   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
767   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
772 {
773   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
774   PetscInt      *lrows;
775   PetscInt       r, len;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* get locally owned rows */
780   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
794   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
795     PetscBool cong;
796     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
797     if (cong) A->congruentlayouts = 1;
798     else      A->congruentlayouts = 0;
799   }
800   if ((diag != 0.0) && A->congruentlayouts) {
801     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
802   } else if (diag != 0.0) {
803     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
805     for (r = 0; r < len; ++r) {
806       const PetscInt row = lrows[r] + A->rmap->rstart;
807       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
808     }
809     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811   } else {
812     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(lrows);CHKERRQ(ierr);
815 
816   /* only change matrix nonzero state if pattern was allowed to be changed */
817   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
818     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
819     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
825 {
826   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode    ierr;
828   PetscMPIInt       n = A->rmap->n;
829   PetscInt          i,j,r,m,p = 0,len = 0;
830   PetscInt          *lrows,*owners = A->rmap->range;
831   PetscSFNode       *rrows;
832   PetscSF           sf;
833   const PetscScalar *xx;
834   PetscScalar       *bb,*mask;
835   Vec               xmask,lmask;
836   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
837   const PetscInt    *aj, *ii,*ridx;
838   PetscScalar       *aa;
839 
840   PetscFunctionBegin;
841   /* Create SF where leaves are input rows and roots are owned rows */
842   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
843   for (r = 0; r < n; ++r) lrows[r] = -1;
844   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
845   for (r = 0; r < N; ++r) {
846     const PetscInt idx   = rows[r];
847     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
848     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
849       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
850     }
851     rrows[r].rank  = p;
852     rrows[r].index = rows[r] - owners[p];
853   }
854   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
855   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
856   /* Collect flags for rows to be zeroed */
857   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
858   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
860   /* Compress and put in row numbers */
861   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
862   /* zero diagonal part of matrix */
863   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
864   /* handle off diagonal part of matrix */
865   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
866   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
867   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
868   for (i=0; i<len; i++) bb[lrows[i]] = 1;
869   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
870   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
873   if (x) {
874     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
877     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
878   }
879   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
880   /* remove zeroed rows of off diagonal matrix */
881   ii = aij->i;
882   for (i=0; i<len; i++) {
883     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
884   }
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i=0; i<m; i++) {
891       n  = ii[i+1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij->a + ii[i];
894 
895       for (j=0; j<n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa*xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i=0; i<m; i++) {
908       n  = ii[i+1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij->a + ii[i];
911       for (j=0; j<n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa*xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x) {
922     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
923     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
924   }
925   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
926   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928 
929   /* only change matrix nonzero state if pattern was allowed to be changed */
930   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
931     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
932     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
938 {
939   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
940   PetscErrorCode ierr;
941   PetscInt       nt;
942   VecScatter     Mvctx = a->Mvctx;
943 
944   PetscFunctionBegin;
945   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
946   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
947 
948   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
949   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
950   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969   VecScatter     Mvctx = a->Mvctx;
970 
971   PetscFunctionBegin;
972   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
973   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
975   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984   PetscBool      merged;
985 
986   PetscFunctionBegin;
987   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
988   /* do nondiagonal part */
989   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
990   if (!merged) {
991     /* send it on its way */
992     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993     /* do local part */
994     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
995     /* receive remote parts: note this assumes the values are not actually */
996     /* added in yy until the next line, */
997     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998   } else {
999     /* do local part */
1000     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1001     /* send it on its way */
1002     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1003     /* values actually were received in the Begin() but we need to call this nop */
1004     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1010 {
1011   MPI_Comm       comm;
1012   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1013   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1014   IS             Me,Notme;
1015   PetscErrorCode ierr;
1016   PetscInt       M,N,first,last,*notme,i;
1017   PetscMPIInt    size;
1018 
1019   PetscFunctionBegin;
1020   /* Easy test: symmetric diagonal block */
1021   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1022   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1023   if (!*f) PetscFunctionReturn(0);
1024   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1026   if (size == 1) PetscFunctionReturn(0);
1027 
1028   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1029   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1030   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1031   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1032   for (i=0; i<first; i++) notme[i] = i;
1033   for (i=last; i<M; i++) notme[i-last+first] = i;
1034   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1035   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1036   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1037   Aoff = Aoffs[0];
1038   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1039   Boff = Boffs[0];
1040   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1041   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1043   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1045   ierr = PetscFree(notme);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1050 {
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1066   /* send it on its way */
1067   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   /* do local part */
1069   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1070   /* receive remote parts */
1071   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 /*
1076   This only works correctly for square matrices where the subblock A->A is the
1077    diagonal block
1078 */
1079 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1080 {
1081   PetscErrorCode ierr;
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083 
1084   PetscFunctionBegin;
1085   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1086   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1087   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1103 {
1104   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108 #if defined(PETSC_USE_LOG)
1109   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1110 #endif
1111   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1112   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1113   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1114   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1115 #if defined(PETSC_USE_CTABLE)
1116   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1117 #else
1118   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1119 #endif
1120   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1121   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1122   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1123   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1124   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1125   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1126   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1127 
1128   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1137 #if defined(PETSC_HAVE_ELEMENTAL)
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1139 #endif
1140 #if defined(PETSC_HAVE_HYPRE)
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1143 #endif
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1148 {
1149   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1150   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1151   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1152   PetscErrorCode ierr;
1153   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1154   int            fd;
1155   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1156   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1157   PetscScalar    *column_values;
1158   PetscInt       message_count,flowcontrolcount;
1159   FILE           *file;
1160 
1161   PetscFunctionBegin;
1162   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1163   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1164   nz   = A->nz + B->nz;
1165   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1166   if (!rank) {
1167     header[0] = MAT_FILE_CLASSID;
1168     header[1] = mat->rmap->N;
1169     header[2] = mat->cmap->N;
1170 
1171     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     /* get largest number of rows any processor has */
1174     rlen  = mat->rmap->n;
1175     range = mat->rmap->range;
1176     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1177   } else {
1178     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     rlen = mat->rmap->n;
1180   }
1181 
1182   /* load up the local row counts */
1183   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1184   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1185 
1186   /* store the row lengths to the file */
1187   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1188   if (!rank) {
1189     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1190     for (i=1; i<size; i++) {
1191       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1192       rlen = range[i+1] - range[i];
1193       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1194       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1195     }
1196     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1197   } else {
1198     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1199     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1200     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1201   }
1202   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1203 
1204   /* load up the local column indices */
1205   nzmax = nz; /* th processor needs space a largest processor needs */
1206   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1208   cnt   = 0;
1209   for (i=0; i<mat->rmap->n; i++) {
1210     for (j=B->i[i]; j<B->i[i+1]; j++) {
1211       if ((col = garray[B->j[j]]) > cstart) break;
1212       column_indices[cnt++] = col;
1213     }
1214     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1215     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1216   }
1217   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1218 
1219   /* store the column indices to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     MPI_Status status;
1223     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1228       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     }
1231     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1232   } else {
1233     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1234     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1239 
1240   /* load up the local column values */
1241   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1242   cnt  = 0;
1243   for (i=0; i<mat->rmap->n; i++) {
1244     for (j=B->i[i]; j<B->i[i+1]; j++) {
1245       if (garray[B->j[j]] > cstart) break;
1246       column_values[cnt++] = B->a[j];
1247     }
1248     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1249     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1250   }
1251   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1252 
1253   /* store the column values to the file */
1254   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1255   if (!rank) {
1256     MPI_Status status;
1257     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1258     for (i=1; i<size; i++) {
1259       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1260       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1261       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1262       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1268     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1271   }
1272   ierr = PetscFree(column_values);CHKERRQ(ierr);
1273 
1274   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1275   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #include <petscdraw.h>
1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1281 {
1282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1283   PetscErrorCode    ierr;
1284   PetscMPIInt       rank = aij->rank,size = aij->size;
1285   PetscBool         isdraw,iascii,isbinary;
1286   PetscViewer       sviewer;
1287   PetscViewerFormat format;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1291   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1293   if (iascii) {
1294     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1295     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1296       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1297       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1298       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1299       for (i=0; i<(PetscInt)size; i++) {
1300         nmax = PetscMax(nmax,nz[i]);
1301         nmin = PetscMin(nmin,nz[i]);
1302         navg += nz[i];
1303       }
1304       ierr = PetscFree(nz);CHKERRQ(ierr);
1305       navg = navg/size;
1306       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1307       PetscFunctionReturn(0);
1308     }
1309     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscBool inodes;
1313 
1314       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1315       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1317       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1318       if (!inodes) {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1321       } else {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1324       }
1325       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1327       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1332       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1337       if (inodes) {
1338         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1339       } else {
1340         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1349       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1350     } else {
1351       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1359     if (isnull) PetscFunctionReturn(0);
1360   }
1361 
1362   {
1363     /* assemble the entire matrix onto first processor. */
1364     Mat        A;
1365     Mat_SeqAIJ *Aloc;
1366     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1367     MatScalar  *a;
1368 
1369     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1370     if (!rank) {
1371       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1372     } else {
1373       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1374     }
1375     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1376     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1377     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1378     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1379     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1380 
1381     /* copy over the A part */
1382     Aloc = (Mat_SeqAIJ*)aij->A->data;
1383     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1384     row  = mat->rmap->rstart;
1385     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1386     for (i=0; i<m; i++) {
1387       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1388       row++;
1389       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1390     }
1391     aj = Aloc->j;
1392     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1393 
1394     /* copy over the B part */
1395     Aloc = (Mat_SeqAIJ*)aij->B->data;
1396     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1397     row  = mat->rmap->rstart;
1398     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1399     ct   = cols;
1400     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1401     for (i=0; i<m; i++) {
1402       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1403       row++;
1404       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1405     }
1406     ierr = PetscFree(ct);CHKERRQ(ierr);
1407     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409     /*
1410        Everyone has to call to draw the matrix since the graphics waits are
1411        synchronized across all processors that share the PetscDraw object
1412     */
1413     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1414     if (!rank) {
1415       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1416       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1419     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1420     ierr = MatDestroy(&A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1426 {
1427   PetscErrorCode ierr;
1428   PetscBool      iascii,isdraw,issocket,isbinary;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1435   if (iascii || isdraw || isbinary || issocket) {
1436     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1442 {
1443   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1444   PetscErrorCode ierr;
1445   Vec            bb1 = 0;
1446   PetscBool      hasop;
1447 
1448   PetscFunctionBegin;
1449   if (flag == SOR_APPLY_UPPER) {
1450     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451     PetscFunctionReturn(0);
1452   }
1453 
1454   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1455     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1456   }
1457 
1458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463 
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1478       its--;
1479     }
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_EISENSTAT) {
1508     Vec xx1;
1509 
1510     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1512 
1513     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     if (!mat->diag) {
1516       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1517       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1518     }
1519     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1520     if (hasop) {
1521       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1522     } else {
1523       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1524     }
1525     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1526 
1527     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1528 
1529     /* local sweep */
1530     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1531     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1532     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1533   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1534 
1535   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1536 
1537   matin->factorerrortype = mat->A->factorerrortype;
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1542 {
1543   Mat            aA,aB,Aperm;
1544   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1545   PetscScalar    *aa,*ba;
1546   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1547   PetscSF        rowsf,sf;
1548   IS             parcolp = NULL;
1549   PetscBool      done;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1554   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1557 
1558   /* Invert row permutation to find out where my rows should go */
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1563   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565 
1566   /* Invert column permutation to find out where my columns should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1570   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1571   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1574 
1575   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1576   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1577   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1578 
1579   /* Find out where my gcols should go */
1580   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1581   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1588 
1589   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt row = rdest[i],rowner;
1594     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1595     for (j=ai[i]; j<ai[i+1]; j++) {
1596       PetscInt cowner,col = cdest[aj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601     for (j=bi[i]; j<bi[i+1]; j++) {
1602       PetscInt cowner,col = gcdest[bj[j]];
1603       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1604       if (rowner == cowner) dnnz[i]++;
1605       else onnz[i]++;
1606     }
1607   }
1608   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1613 
1614   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1617   for (i=0; i<m; i++) {
1618     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1619     PetscInt j0,rowlen;
1620     rowlen = ai[i+1] - ai[i];
1621     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625     rowlen = bi[i+1] - bi[i];
1626     for (j0=j=0; j<rowlen; j0=j) {
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1637   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1639   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1640   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1641   *B = Aperm;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1646 {
1647   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1652   if (ghosts) *ghosts = aij->garray;
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1657 {
1658   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1659   Mat            A    = mat->A,B = mat->B;
1660   PetscErrorCode ierr;
1661   PetscReal      isend[5],irecv[5];
1662 
1663   PetscFunctionBegin;
1664   info->block_size = 1.0;
1665   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1666 
1667   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1668   isend[3] = info->memory;  isend[4] = info->mallocs;
1669 
1670   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1671 
1672   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1673   isend[3] += info->memory;  isend[4] += info->mallocs;
1674   if (flag == MAT_LOCAL) {
1675     info->nz_used      = isend[0];
1676     info->nz_allocated = isend[1];
1677     info->nz_unneeded  = isend[2];
1678     info->memory       = isend[3];
1679     info->mallocs      = isend[4];
1680   } else if (flag == MAT_GLOBAL_MAX) {
1681     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1682 
1683     info->nz_used      = irecv[0];
1684     info->nz_allocated = irecv[1];
1685     info->nz_unneeded  = irecv[2];
1686     info->memory       = irecv[3];
1687     info->mallocs      = irecv[4];
1688   } else if (flag == MAT_GLOBAL_SUM) {
1689     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1690 
1691     info->nz_used      = irecv[0];
1692     info->nz_allocated = irecv[1];
1693     info->nz_unneeded  = irecv[2];
1694     info->memory       = irecv[3];
1695     info->mallocs      = irecv[4];
1696   }
1697   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1698   info->fill_ratio_needed = 0;
1699   info->factor_mallocs    = 0;
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1704 {
1705   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   switch (op) {
1710   case MAT_NEW_NONZERO_LOCATIONS:
1711   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1712   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1713   case MAT_KEEP_NONZERO_PATTERN:
1714   case MAT_NEW_NONZERO_LOCATION_ERR:
1715   case MAT_USE_INODES:
1716   case MAT_IGNORE_ZERO_ENTRIES:
1717     MatCheckPreallocated(A,1);
1718     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1719     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1720     break;
1721   case MAT_ROW_ORIENTED:
1722     MatCheckPreallocated(A,1);
1723     a->roworiented = flg;
1724 
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1727     break;
1728   case MAT_NEW_DIAGONALS:
1729     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1730     break;
1731   case MAT_IGNORE_OFF_PROC_ENTRIES:
1732     a->donotstash = flg;
1733     break;
1734   case MAT_SPD:
1735     A->spd_set = PETSC_TRUE;
1736     A->spd     = flg;
1737     if (flg) {
1738       A->symmetric                  = PETSC_TRUE;
1739       A->structurally_symmetric     = PETSC_TRUE;
1740       A->symmetric_set              = PETSC_TRUE;
1741       A->structurally_symmetric_set = PETSC_TRUE;
1742     }
1743     break;
1744   case MAT_SYMMETRIC:
1745     MatCheckPreallocated(A,1);
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_STRUCTURALLY_SYMMETRIC:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_HERMITIAN:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SYMMETRY_ETERNAL:
1757     MatCheckPreallocated(A,1);
1758     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_SUBMAT_SINGLEIS:
1761     A->submat_singleis = flg;
1762     break;
1763   case MAT_STRUCTURE_ONLY:
1764     /* The option is handled directly by MatSetOption() */
1765     break;
1766   default:
1767     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1773 {
1774   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1775   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1776   PetscErrorCode ierr;
1777   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1778   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1779   PetscInt       *cmap,*idx_p;
1780 
1781   PetscFunctionBegin;
1782   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1783   mat->getrowactive = PETSC_TRUE;
1784 
1785   if (!mat->rowvalues && (idx || v)) {
1786     /*
1787         allocate enough space to hold information from the longest row.
1788     */
1789     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1790     PetscInt   max = 1,tmp;
1791     for (i=0; i<matin->rmap->n; i++) {
1792       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1793       if (max < tmp) max = tmp;
1794     }
1795     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1796   }
1797 
1798   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1799   lrow = row - rstart;
1800 
1801   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1802   if (!v)   {pvA = 0; pvB = 0;}
1803   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1804   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1805   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1806   nztot = nzA + nzB;
1807 
1808   cmap = mat->garray;
1809   if (v  || idx) {
1810     if (nztot) {
1811       /* Sort by increasing column numbers, assuming A and B already sorted */
1812       PetscInt imark = -1;
1813       if (v) {
1814         *v = v_p = mat->rowvalues;
1815         for (i=0; i<nzB; i++) {
1816           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1817           else break;
1818         }
1819         imark = i;
1820         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1821         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1822       }
1823       if (idx) {
1824         *idx = idx_p = mat->rowindices;
1825         if (imark > -1) {
1826           for (i=0; i<imark; i++) {
1827             idx_p[i] = cmap[cworkB[i]];
1828           }
1829         } else {
1830           for (i=0; i<nzB; i++) {
1831             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1832             else break;
1833           }
1834           imark = i;
1835         }
1836         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1837         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1838       }
1839     } else {
1840       if (idx) *idx = 0;
1841       if (v)   *v   = 0;
1842     }
1843   }
1844   *nz  = nztot;
1845   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1846   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1851 {
1852   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1853 
1854   PetscFunctionBegin;
1855   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1856   aij->getrowactive = PETSC_FALSE;
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1861 {
1862   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1863   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1864   PetscErrorCode ierr;
1865   PetscInt       i,j,cstart = mat->cmap->rstart;
1866   PetscReal      sum = 0.0;
1867   MatScalar      *v;
1868 
1869   PetscFunctionBegin;
1870   if (aij->size == 1) {
1871     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1872   } else {
1873     if (type == NORM_FROBENIUS) {
1874       v = amat->a;
1875       for (i=0; i<amat->nz; i++) {
1876         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877       }
1878       v = bmat->a;
1879       for (i=0; i<bmat->nz; i++) {
1880         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1881       }
1882       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1883       *norm = PetscSqrtReal(*norm);
1884       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1885     } else if (type == NORM_1) { /* max column norm */
1886       PetscReal *tmp,*tmp2;
1887       PetscInt  *jj,*garray = aij->garray;
1888       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1889       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1890       *norm = 0.0;
1891       v     = amat->a; jj = amat->j;
1892       for (j=0; j<amat->nz; j++) {
1893         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1894       }
1895       v = bmat->a; jj = bmat->j;
1896       for (j=0; j<bmat->nz; j++) {
1897         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1898       }
1899       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       for (j=0; j<mat->cmap->N; j++) {
1901         if (tmp2[j] > *norm) *norm = tmp2[j];
1902       }
1903       ierr = PetscFree(tmp);CHKERRQ(ierr);
1904       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1905       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1906     } else if (type == NORM_INFINITY) { /* max row norm */
1907       PetscReal ntemp = 0.0;
1908       for (j=0; j<aij->A->rmap->n; j++) {
1909         v   = amat->a + amat->i[j];
1910         sum = 0.0;
1911         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1912           sum += PetscAbsScalar(*v); v++;
1913         }
1914         v = bmat->a + bmat->i[j];
1915         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1916           sum += PetscAbsScalar(*v); v++;
1917         }
1918         if (sum > ntemp) ntemp = sum;
1919       }
1920       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1922     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1928 {
1929   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1930   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1931   PetscErrorCode ierr;
1932   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1933   PetscInt       cstart = A->cmap->rstart,ncol;
1934   Mat            B;
1935   MatScalar      *array;
1936 
1937   PetscFunctionBegin;
1938   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1939 
1940   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1941   ai = Aloc->i; aj = Aloc->j;
1942   bi = Bloc->i; bj = Bloc->j;
1943   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1944     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1945     PetscSFNode          *oloc;
1946     PETSC_UNUSED PetscSF sf;
1947 
1948     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1949     /* compute d_nnz for preallocation */
1950     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1951     for (i=0; i<ai[ma]; i++) {
1952       d_nnz[aj[i]]++;
1953       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954     }
1955     /* compute local off-diagonal contributions */
1956     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1957     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1958     /* map those to global */
1959     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1960     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1961     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1962     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1963     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1964     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1965     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1966 
1967     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1968     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1969     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1970     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1971     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1972     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1973   } else {
1974     B    = *matout;
1975     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1976     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1977   }
1978 
1979   /* copy over the A part */
1980   array = Aloc->a;
1981   row   = A->rmap->rstart;
1982   for (i=0; i<ma; i++) {
1983     ncol = ai[i+1]-ai[i];
1984     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1985     row++;
1986     array += ncol; aj += ncol;
1987   }
1988   aj = Aloc->j;
1989   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1990 
1991   /* copy over the B part */
1992   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1993   array = Bloc->a;
1994   row   = A->rmap->rstart;
1995   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1996   cols_tmp = cols;
1997   for (i=0; i<mb; i++) {
1998     ncol = bi[i+1]-bi[i];
1999     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2000     row++;
2001     array += ncol; cols_tmp += ncol;
2002   }
2003   ierr = PetscFree(cols);CHKERRQ(ierr);
2004 
2005   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2007   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2008     *matout = B;
2009   } else {
2010     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2011   }
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2016 {
2017   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2018   Mat            a    = aij->A,b = aij->B;
2019   PetscErrorCode ierr;
2020   PetscInt       s1,s2,s3;
2021 
2022   PetscFunctionBegin;
2023   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2024   if (rr) {
2025     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2026     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2027     /* Overlap communication with computation. */
2028     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2029   }
2030   if (ll) {
2031     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2032     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2033     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2034   }
2035   /* scale  the diagonal block */
2036   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2037 
2038   if (rr) {
2039     /* Do a scatter end and then right scale the off-diagonal block */
2040     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2041     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2047 {
2048   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2049   PetscErrorCode ierr;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2057 {
2058   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2059   Mat            a,b,c,d;
2060   PetscBool      flg;
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   a = matA->A; b = matA->B;
2065   c = matB->A; d = matB->B;
2066 
2067   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2068   if (flg) {
2069     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2070   }
2071   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2076 {
2077   PetscErrorCode ierr;
2078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2079   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2080 
2081   PetscFunctionBegin;
2082   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2083   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2084     /* because of the column compression in the off-processor part of the matrix a->B,
2085        the number of columns in a->B and b->B may be different, hence we cannot call
2086        the MatCopy() directly on the two parts. If need be, we can provide a more
2087        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2088        then copying the submatrices */
2089     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2090   } else {
2091     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2092     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2093   }
2094   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2099 {
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /*
2108    Computes the number of nonzeros per row needed for preallocation when X and Y
2109    have different nonzero structure.
2110 */
2111 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2112 {
2113   PetscInt       i,j,k,nzx,nzy;
2114 
2115   PetscFunctionBegin;
2116   /* Set the number of nonzeros in the new matrix */
2117   for (i=0; i<m; i++) {
2118     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2119     nzx = xi[i+1] - xi[i];
2120     nzy = yi[i+1] - yi[i];
2121     nnz[i] = 0;
2122     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2123       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2124       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2125       nnz[i]++;
2126     }
2127     for (; k<nzy; k++) nnz[i]++;
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       m = Y->rmap->N;
2137   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2138   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2146 {
2147   PetscErrorCode ierr;
2148   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2149   PetscBLASInt   bnz,one=1;
2150   Mat_SeqAIJ     *x,*y;
2151 
2152   PetscFunctionBegin;
2153   if (str == SAME_NONZERO_PATTERN) {
2154     PetscScalar alpha = a;
2155     x    = (Mat_SeqAIJ*)xx->A->data;
2156     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2157     y    = (Mat_SeqAIJ*)yy->A->data;
2158     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2159     x    = (Mat_SeqAIJ*)xx->B->data;
2160     y    = (Mat_SeqAIJ*)yy->B->data;
2161     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2162     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2163     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2164   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2165     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2166   } else {
2167     Mat      B;
2168     PetscInt *nnz_d,*nnz_o;
2169     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2170     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2171     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2172     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2173     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2174     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2175     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2176     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2177     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2178     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2179     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2180     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2181     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2182     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2183   }
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2188 
2189 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2190 {
2191 #if defined(PETSC_USE_COMPLEX)
2192   PetscErrorCode ierr;
2193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2194 
2195   PetscFunctionBegin;
2196   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2197   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2198 #else
2199   PetscFunctionBegin;
2200 #endif
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2205 {
2206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2211   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2216 {
2217   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2222   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2227 {
2228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2229   PetscErrorCode ierr;
2230   PetscInt       i,*idxb = 0;
2231   PetscScalar    *va,*vb;
2232   Vec            vtmp;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2236   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2237   if (idx) {
2238     for (i=0; i<A->rmap->n; i++) {
2239       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2240     }
2241   }
2242 
2243   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2244   if (idx) {
2245     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2246   }
2247   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2248   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2249 
2250   for (i=0; i<A->rmap->n; i++) {
2251     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2252       va[i] = vb[i];
2253       if (idx) idx[i] = a->garray[idxb[i]];
2254     }
2255   }
2256 
2257   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2258   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2259   ierr = PetscFree(idxb);CHKERRQ(ierr);
2260   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2265 {
2266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2267   PetscErrorCode ierr;
2268   PetscInt       i,*idxb = 0;
2269   PetscScalar    *va,*vb;
2270   Vec            vtmp;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2274   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2275   if (idx) {
2276     for (i=0; i<A->cmap->n; i++) {
2277       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2278     }
2279   }
2280 
2281   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2282   if (idx) {
2283     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2284   }
2285   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2286   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2287 
2288   for (i=0; i<A->rmap->n; i++) {
2289     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2290       va[i] = vb[i];
2291       if (idx) idx[i] = a->garray[idxb[i]];
2292     }
2293   }
2294 
2295   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2296   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2297   ierr = PetscFree(idxb);CHKERRQ(ierr);
2298   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2303 {
2304   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2305   PetscInt       n      = A->rmap->n;
2306   PetscInt       cstart = A->cmap->rstart;
2307   PetscInt       *cmap  = mat->garray;
2308   PetscInt       *diagIdx, *offdiagIdx;
2309   Vec            diagV, offdiagV;
2310   PetscScalar    *a, *diagA, *offdiagA;
2311   PetscInt       r;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2316   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2317   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2318   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2319   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2320   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2321   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2322   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2323   for (r = 0; r < n; ++r) {
2324     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2325       a[r]   = diagA[r];
2326       idx[r] = cstart + diagIdx[r];
2327     } else {
2328       a[r]   = offdiagA[r];
2329       idx[r] = cmap[offdiagIdx[r]];
2330     }
2331   }
2332   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2333   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2335   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2336   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2337   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2342 {
2343   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2344   PetscInt       n      = A->rmap->n;
2345   PetscInt       cstart = A->cmap->rstart;
2346   PetscInt       *cmap  = mat->garray;
2347   PetscInt       *diagIdx, *offdiagIdx;
2348   Vec            diagV, offdiagV;
2349   PetscScalar    *a, *diagA, *offdiagA;
2350   PetscInt       r;
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2355   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2356   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2357   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2358   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2359   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2360   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2361   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2362   for (r = 0; r < n; ++r) {
2363     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2364       a[r]   = diagA[r];
2365       idx[r] = cstart + diagIdx[r];
2366     } else {
2367       a[r]   = offdiagA[r];
2368       idx[r] = cmap[offdiagIdx[r]];
2369     }
2370   }
2371   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2374   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2375   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2376   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2381 {
2382   PetscErrorCode ierr;
2383   Mat            *dummy;
2384 
2385   PetscFunctionBegin;
2386   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2387   *newmat = *dummy;
2388   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2393 {
2394   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2395   PetscErrorCode ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2399   A->factorerrortype = a->A->factorerrortype;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2404 {
2405   PetscErrorCode ierr;
2406   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2410   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2411   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2417 {
2418   PetscFunctionBegin;
2419   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2420   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 /*@
2425    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2426 
2427    Collective on Mat
2428 
2429    Input Parameters:
2430 +    A - the matrix
2431 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2432 
2433  Level: advanced
2434 
2435 @*/
2436 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2437 {
2438   PetscErrorCode       ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2446 {
2447   PetscErrorCode       ierr;
2448   PetscBool            sc = PETSC_FALSE,flg;
2449 
2450   PetscFunctionBegin;
2451   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2452   ierr = PetscObjectOptionsBegin((PetscObject)A);
2453     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2454     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2455     if (flg) {
2456       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2457     }
2458   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2463 {
2464   PetscErrorCode ierr;
2465   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2466   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2467 
2468   PetscFunctionBegin;
2469   if (!Y->preallocated) {
2470     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2471   } else if (!aij->nz) {
2472     PetscInt nonew = aij->nonew;
2473     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2474     aij->nonew = nonew;
2475   }
2476   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2481 {
2482   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2487   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2488   if (d) {
2489     PetscInt rstart;
2490     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2491     *d += rstart;
2492 
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 
2498 /* -------------------------------------------------------------------*/
2499 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2500                                        MatGetRow_MPIAIJ,
2501                                        MatRestoreRow_MPIAIJ,
2502                                        MatMult_MPIAIJ,
2503                                 /* 4*/ MatMultAdd_MPIAIJ,
2504                                        MatMultTranspose_MPIAIJ,
2505                                        MatMultTransposeAdd_MPIAIJ,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                 /*10*/ 0,
2510                                        0,
2511                                        0,
2512                                        MatSOR_MPIAIJ,
2513                                        MatTranspose_MPIAIJ,
2514                                 /*15*/ MatGetInfo_MPIAIJ,
2515                                        MatEqual_MPIAIJ,
2516                                        MatGetDiagonal_MPIAIJ,
2517                                        MatDiagonalScale_MPIAIJ,
2518                                        MatNorm_MPIAIJ,
2519                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2520                                        MatAssemblyEnd_MPIAIJ,
2521                                        MatSetOption_MPIAIJ,
2522                                        MatZeroEntries_MPIAIJ,
2523                                 /*24*/ MatZeroRows_MPIAIJ,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                 /*29*/ MatSetUp_MPIAIJ,
2529                                        0,
2530                                        0,
2531                                        MatGetDiagonalBlock_MPIAIJ,
2532                                        0,
2533                                 /*34*/ MatDuplicate_MPIAIJ,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                 /*39*/ MatAXPY_MPIAIJ,
2539                                        MatCreateSubMatrices_MPIAIJ,
2540                                        MatIncreaseOverlap_MPIAIJ,
2541                                        MatGetValues_MPIAIJ,
2542                                        MatCopy_MPIAIJ,
2543                                 /*44*/ MatGetRowMax_MPIAIJ,
2544                                        MatScale_MPIAIJ,
2545                                        MatShift_MPIAIJ,
2546                                        MatDiagonalSet_MPIAIJ,
2547                                        MatZeroRowsColumns_MPIAIJ,
2548                                 /*49*/ MatSetRandom_MPIAIJ,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2554                                        0,
2555                                        MatSetUnfactored_MPIAIJ,
2556                                        MatPermute_MPIAIJ,
2557                                        0,
2558                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2559                                        MatDestroy_MPIAIJ,
2560                                        MatView_MPIAIJ,
2561                                        0,
2562                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2563                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2564                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2569                                        MatGetRowMinAbs_MPIAIJ,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                 /*75*/ MatFDColoringApply_AIJ,
2575                                        MatSetFromOptions_MPIAIJ,
2576                                        0,
2577                                        0,
2578                                        MatFindZeroDiagonals_MPIAIJ,
2579                                 /*80*/ 0,
2580                                        0,
2581                                        0,
2582                                 /*83*/ MatLoad_MPIAIJ,
2583                                        MatIsSymmetric_MPIAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2589                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2590                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2591                                        MatPtAP_MPIAIJ_MPIAIJ,
2592                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2593                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*99*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatConjugate_MPIAIJ,
2602                                        0,
2603                                 /*104*/MatSetValuesRow_MPIAIJ,
2604                                        MatRealPart_MPIAIJ,
2605                                        MatImaginaryPart_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                 /*109*/0,
2609                                        0,
2610                                        MatGetRowMin_MPIAIJ,
2611                                        0,
2612                                        MatMissingDiagonal_MPIAIJ,
2613                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2614                                        0,
2615                                        MatGetGhosts_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                 /*119*/0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        MatGetMultiProcBlock_MPIAIJ,
2623                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2624                                        MatGetColumnNorms_MPIAIJ,
2625                                        MatInvertBlockDiagonal_MPIAIJ,
2626                                        0,
2627                                        MatCreateSubMatricesMPI_MPIAIJ,
2628                                 /*129*/0,
2629                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2630                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2631                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2632                                        0,
2633                                 /*134*/0,
2634                                        0,
2635                                        MatRARt_MPIAIJ_MPIAIJ,
2636                                        0,
2637                                        0,
2638                                 /*139*/MatSetBlockSizes_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                        MatFDColoringSetUp_MPIXAIJ,
2642                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2643                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2644 };
2645 
2646 /* ----------------------------------------------------------------------------------------*/
2647 
2648 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2649 {
2650   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2651   PetscErrorCode ierr;
2652 
2653   PetscFunctionBegin;
2654   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2655   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2660 {
2661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2662   PetscErrorCode ierr;
2663 
2664   PetscFunctionBegin;
2665   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2666   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2671 {
2672   Mat_MPIAIJ     *b;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2677   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2678   b = (Mat_MPIAIJ*)B->data;
2679 
2680 #if defined(PETSC_USE_CTABLE)
2681   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2682 #else
2683   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2684 #endif
2685   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2686   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2687   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2688 
2689   /* Because the B will have been resized we simply destroy it and create a new one each time */
2690   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2691   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2692   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2693   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2694   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2695   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2696 
2697   if (!B->preallocated) {
2698     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2699     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2700     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2701     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2702     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2703   }
2704 
2705   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2706   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2707   B->preallocated  = PETSC_TRUE;
2708   B->was_assembled = PETSC_FALSE;
2709   B->assembled     = PETSC_FALSE;;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2714 {
2715   Mat_MPIAIJ     *b;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2720   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2721   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2722   b = (Mat_MPIAIJ*)B->data;
2723 
2724 #if defined(PETSC_USE_CTABLE)
2725   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2726 #else
2727   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2728 #endif
2729   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2730   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2731   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2732 
2733   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2734   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2735   B->preallocated  = PETSC_TRUE;
2736   B->was_assembled = PETSC_FALSE;
2737   B->assembled = PETSC_FALSE;
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2742 {
2743   Mat            mat;
2744   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   *newmat = 0;
2749   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2750   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2751   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2752   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2753   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2754   a       = (Mat_MPIAIJ*)mat->data;
2755 
2756   mat->factortype   = matin->factortype;
2757   mat->assembled    = PETSC_TRUE;
2758   mat->insertmode   = NOT_SET_VALUES;
2759   mat->preallocated = PETSC_TRUE;
2760 
2761   a->size         = oldmat->size;
2762   a->rank         = oldmat->rank;
2763   a->donotstash   = oldmat->donotstash;
2764   a->roworiented  = oldmat->roworiented;
2765   a->rowindices   = 0;
2766   a->rowvalues    = 0;
2767   a->getrowactive = PETSC_FALSE;
2768 
2769   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2770   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2771 
2772   if (oldmat->colmap) {
2773 #if defined(PETSC_USE_CTABLE)
2774     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2775 #else
2776     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2777     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2778     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779 #endif
2780   } else a->colmap = 0;
2781   if (oldmat->garray) {
2782     PetscInt len;
2783     len  = oldmat->B->cmap->n;
2784     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2785     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2786     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2787   } else a->garray = 0;
2788 
2789   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2790   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2791   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2792   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2793 
2794   if (oldmat->Mvctx_mpi1) {
2795     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2796     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2797   }
2798 
2799   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2800   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2801   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2802   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2803   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2804   *newmat = mat;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2809 {
2810   PetscScalar    *vals,*svals;
2811   MPI_Comm       comm;
2812   PetscErrorCode ierr;
2813   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2814   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2815   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2816   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2817   PetscInt       cend,cstart,n,*rowners;
2818   int            fd;
2819   PetscInt       bs = newMat->rmap->bs;
2820 
2821   PetscFunctionBegin;
2822   /* force binary viewer to load .info file if it has not yet done so */
2823   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2824   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2825   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2826   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2827   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2828   if (!rank) {
2829     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2830     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2831     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2832   }
2833 
2834   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2835   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2836   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2837   if (bs < 0) bs = 1;
2838 
2839   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2840   M    = header[1]; N = header[2];
2841 
2842   /* If global sizes are set, check if they are consistent with that given in the file */
2843   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2844   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2845 
2846   /* determine ownership of all (block) rows */
2847   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2848   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2849   else m = newMat->rmap->n; /* Set by user */
2850 
2851   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2852   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2853 
2854   /* First process needs enough room for process with most rows */
2855   if (!rank) {
2856     mmax = rowners[1];
2857     for (i=2; i<=size; i++) {
2858       mmax = PetscMax(mmax, rowners[i]);
2859     }
2860   } else mmax = -1;             /* unused, but compilers complain */
2861 
2862   rowners[0] = 0;
2863   for (i=2; i<=size; i++) {
2864     rowners[i] += rowners[i-1];
2865   }
2866   rstart = rowners[rank];
2867   rend   = rowners[rank+1];
2868 
2869   /* distribute row lengths to all processors */
2870   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2871   if (!rank) {
2872     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2873     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2874     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2875     for (j=0; j<m; j++) {
2876       procsnz[0] += ourlens[j];
2877     }
2878     for (i=1; i<size; i++) {
2879       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2880       /* calculate the number of nonzeros on each processor */
2881       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2882         procsnz[i] += rowlengths[j];
2883       }
2884       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2885     }
2886     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2887   } else {
2888     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2889   }
2890 
2891   if (!rank) {
2892     /* determine max buffer needed and allocate it */
2893     maxnz = 0;
2894     for (i=0; i<size; i++) {
2895       maxnz = PetscMax(maxnz,procsnz[i]);
2896     }
2897     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2898 
2899     /* read in my part of the matrix column indices  */
2900     nz   = procsnz[0];
2901     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2902     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2903 
2904     /* read in every one elses and ship off */
2905     for (i=1; i<size; i++) {
2906       nz   = procsnz[i];
2907       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2908       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2909     }
2910     ierr = PetscFree(cols);CHKERRQ(ierr);
2911   } else {
2912     /* determine buffer space needed for message */
2913     nz = 0;
2914     for (i=0; i<m; i++) {
2915       nz += ourlens[i];
2916     }
2917     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2918 
2919     /* receive message of column indices*/
2920     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2921   }
2922 
2923   /* determine column ownership if matrix is not square */
2924   if (N != M) {
2925     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2926     else n = newMat->cmap->n;
2927     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2928     cstart = cend - n;
2929   } else {
2930     cstart = rstart;
2931     cend   = rend;
2932     n      = cend - cstart;
2933   }
2934 
2935   /* loop over local rows, determining number of off diagonal entries */
2936   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2937   jj   = 0;
2938   for (i=0; i<m; i++) {
2939     for (j=0; j<ourlens[i]; j++) {
2940       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2941       jj++;
2942     }
2943   }
2944 
2945   for (i=0; i<m; i++) {
2946     ourlens[i] -= offlens[i];
2947   }
2948   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2949 
2950   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2951 
2952   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2953 
2954   for (i=0; i<m; i++) {
2955     ourlens[i] += offlens[i];
2956   }
2957 
2958   if (!rank) {
2959     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2960 
2961     /* read in my part of the matrix numerical values  */
2962     nz   = procsnz[0];
2963     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2964 
2965     /* insert into matrix */
2966     jj      = rstart;
2967     smycols = mycols;
2968     svals   = vals;
2969     for (i=0; i<m; i++) {
2970       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2971       smycols += ourlens[i];
2972       svals   += ourlens[i];
2973       jj++;
2974     }
2975 
2976     /* read in other processors and ship out */
2977     for (i=1; i<size; i++) {
2978       nz   = procsnz[i];
2979       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2980       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2981     }
2982     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2983   } else {
2984     /* receive numeric values */
2985     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2986 
2987     /* receive message of values*/
2988     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2989 
2990     /* insert into matrix */
2991     jj      = rstart;
2992     smycols = mycols;
2993     svals   = vals;
2994     for (i=0; i<m; i++) {
2995       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2996       smycols += ourlens[i];
2997       svals   += ourlens[i];
2998       jj++;
2999     }
3000   }
3001   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3002   ierr = PetscFree(vals);CHKERRQ(ierr);
3003   ierr = PetscFree(mycols);CHKERRQ(ierr);
3004   ierr = PetscFree(rowners);CHKERRQ(ierr);
3005   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 /* Not scalable because of ISAllGather() unless getting all columns. */
3011 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3012 {
3013   PetscErrorCode ierr;
3014   IS             iscol_local;
3015   PetscBool      isstride;
3016   PetscMPIInt    lisstride=0,gisstride;
3017 
3018   PetscFunctionBegin;
3019   /* check if we are grabbing all columns*/
3020   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3021 
3022   if (isstride) {
3023     PetscInt  start,len,mstart,mlen;
3024     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3025     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3026     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3027     if (mstart == start && mlen-mstart == len) lisstride = 1;
3028   }
3029 
3030   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3031   if (gisstride) {
3032     PetscInt N;
3033     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3034     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3035     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3036     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3037   } else {
3038     PetscInt cbs;
3039     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3040     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3041     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3042   }
3043 
3044   *isseq = iscol_local;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /*
3049  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3050  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3051 
3052  Input Parameters:
3053    mat - matrix
3054    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3055            i.e., mat->rstart <= isrow[i] < mat->rend
3056    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3057            i.e., mat->cstart <= iscol[i] < mat->cend
3058  Output Parameter:
3059    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3060    iscol_o - sequential column index set for retrieving mat->B
3061    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3062  */
3063 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3064 {
3065   PetscErrorCode ierr;
3066   Vec            x,cmap;
3067   const PetscInt *is_idx;
3068   PetscScalar    *xarray,*cmaparray;
3069   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3070   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3071   Mat            B=a->B;
3072   Vec            lvec=a->lvec,lcmap;
3073   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3074   MPI_Comm       comm;
3075   VecScatter     Mvctx=a->Mvctx;
3076 
3077   PetscFunctionBegin;
3078   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3079   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3080 
3081   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3082   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3083   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3084   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3085   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3086 
3087   /* Get start indices */
3088   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3089   isstart -= ncols;
3090   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3091 
3092   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3093   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3094   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3095   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3096   for (i=0; i<ncols; i++) {
3097     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3098     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3099     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3100   }
3101   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3102   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3103   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3104 
3105   /* Get iscol_d */
3106   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3107   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3108   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3109 
3110   /* Get isrow_d */
3111   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3112   rstart = mat->rmap->rstart;
3113   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3114   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3115   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3116   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3117 
3118   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3119   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3120   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3121 
3122   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3123   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125 
3126   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3127 
3128   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3129   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3130 
3131   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3132   /* off-process column indices */
3133   count = 0;
3134   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3135   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3136 
3137   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3138   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3139   for (i=0; i<Bn; i++) {
3140     if (PetscRealPart(xarray[i]) > -1.0) {
3141       idx[count]     = i;                   /* local column index in off-diagonal part B */
3142       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3143       count++;
3144     }
3145   }
3146   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3147   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3148 
3149   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3150   /* cannot ensure iscol_o has same blocksize as iscol! */
3151 
3152   ierr = PetscFree(idx);CHKERRQ(ierr);
3153   *garray = cmap1;
3154 
3155   ierr = VecDestroy(&x);CHKERRQ(ierr);
3156   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3157   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3162 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3163 {
3164   PetscErrorCode ierr;
3165   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3166   Mat            M = NULL;
3167   MPI_Comm       comm;
3168   IS             iscol_d,isrow_d,iscol_o;
3169   Mat            Asub = NULL,Bsub = NULL;
3170   PetscInt       n;
3171 
3172   PetscFunctionBegin;
3173   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3174 
3175   if (call == MAT_REUSE_MATRIX) {
3176     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3177     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3178     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3179 
3180     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3181     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3182 
3183     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3184     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3185 
3186     /* Update diagonal and off-diagonal portions of submat */
3187     asub = (Mat_MPIAIJ*)(*submat)->data;
3188     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3189     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3190     if (n) {
3191       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3192     }
3193     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195 
3196   } else { /* call == MAT_INITIAL_MATRIX) */
3197     const PetscInt *garray;
3198     PetscInt        BsubN;
3199 
3200     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3201     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3202 
3203     /* Create local submatrices Asub and Bsub */
3204     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3205     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3206 
3207     /* Create submatrix M */
3208     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3209 
3210     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3211     asub = (Mat_MPIAIJ*)M->data;
3212 
3213     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3214     n = asub->B->cmap->N;
3215     if (BsubN > n) {
3216       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3217       const PetscInt *idx;
3218       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3219       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3220 
3221       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3222       j = 0;
3223       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3224       for (i=0; i<n; i++) {
3225         if (j >= BsubN) break;
3226         while (subgarray[i] > garray[j]) j++;
3227 
3228         if (subgarray[i] == garray[j]) {
3229           idx_new[i] = idx[j++];
3230         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3231       }
3232       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3233 
3234       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3235       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3236 
3237     } else if (BsubN < n) {
3238       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3239     }
3240 
3241     ierr = PetscFree(garray);CHKERRQ(ierr);
3242     *submat = M;
3243 
3244     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3245     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3246     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3247 
3248     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3249     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3250 
3251     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3252     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3253   }
3254   PetscFunctionReturn(0);
3255 }
3256 
3257 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3258 {
3259   PetscErrorCode ierr;
3260   IS             iscol_local=NULL,isrow_d;
3261   PetscInt       csize;
3262   PetscInt       n,i,j,start,end;
3263   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3264   MPI_Comm       comm;
3265 
3266   PetscFunctionBegin;
3267   /* If isrow has same processor distribution as mat,
3268      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3269   if (call == MAT_REUSE_MATRIX) {
3270     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3271     if (isrow_d) {
3272       sameRowDist  = PETSC_TRUE;
3273       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3274     } else {
3275       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3276       if (iscol_local) {
3277         sameRowDist  = PETSC_TRUE;
3278         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3279       }
3280     }
3281   } else {
3282     /* Check if isrow has same processor distribution as mat */
3283     sameDist[0] = PETSC_FALSE;
3284     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3285     if (!n) {
3286       sameDist[0] = PETSC_TRUE;
3287     } else {
3288       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3289       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3290       if (i >= start && j < end) {
3291         sameDist[0] = PETSC_TRUE;
3292       }
3293     }
3294 
3295     /* Check if iscol has same processor distribution as mat */
3296     sameDist[1] = PETSC_FALSE;
3297     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3298     if (!n) {
3299       sameDist[1] = PETSC_TRUE;
3300     } else {
3301       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3302       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3303       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3304     }
3305 
3306     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3307     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3308     sameRowDist = tsameDist[0];
3309   }
3310 
3311   if (sameRowDist) {
3312     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3313       /* isrow and iscol have same processor distribution as mat */
3314       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3315       PetscFunctionReturn(0);
3316     } else { /* sameRowDist */
3317       /* isrow has same processor distribution as mat */
3318       if (call == MAT_INITIAL_MATRIX) {
3319         PetscBool sorted;
3320         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3321         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3322         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3323         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3324 
3325         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3326         if (sorted) {
3327           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3328           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3329           PetscFunctionReturn(0);
3330         }
3331       } else { /* call == MAT_REUSE_MATRIX */
3332         IS    iscol_sub;
3333         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3334         if (iscol_sub) {
3335           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3336           PetscFunctionReturn(0);
3337         }
3338       }
3339     }
3340   }
3341 
3342   /* General case: iscol -> iscol_local which has global size of iscol */
3343   if (call == MAT_REUSE_MATRIX) {
3344     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3345     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3346   } else {
3347     if (!iscol_local) {
3348       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3349     }
3350   }
3351 
3352   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3353   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3354 
3355   if (call == MAT_INITIAL_MATRIX) {
3356     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3357     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3358   }
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 /*@C
3363      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3364          and "off-diagonal" part of the matrix in CSR format.
3365 
3366    Collective on MPI_Comm
3367 
3368    Input Parameters:
3369 +  comm - MPI communicator
3370 .  A - "diagonal" portion of matrix
3371 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3372 -  garray - global index of B columns
3373 
3374    Output Parameter:
3375 .   mat - the matrix, with input A as its local diagonal matrix
3376    Level: advanced
3377 
3378    Notes:
3379        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3380        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3381 
3382 .seealso: MatCreateMPIAIJWithSplitArrays()
3383 @*/
3384 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3385 {
3386   PetscErrorCode ierr;
3387   Mat_MPIAIJ     *maij;
3388   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3389   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3390   PetscScalar    *oa=b->a;
3391   Mat            Bnew;
3392   PetscInt       m,n,N;
3393 
3394   PetscFunctionBegin;
3395   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3396   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3397   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3398   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3399   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3400   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3401 
3402   /* Get global columns of mat */
3403   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3404 
3405   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3406   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3407   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3408   maij = (Mat_MPIAIJ*)(*mat)->data;
3409 
3410   (*mat)->preallocated = PETSC_TRUE;
3411 
3412   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3413   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3414 
3415   /* Set A as diagonal portion of *mat */
3416   maij->A = A;
3417 
3418   nz = oi[m];
3419   for (i=0; i<nz; i++) {
3420     col   = oj[i];
3421     oj[i] = garray[col];
3422   }
3423 
3424    /* Set Bnew as off-diagonal portion of *mat */
3425   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3426   bnew        = (Mat_SeqAIJ*)Bnew->data;
3427   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3428   maij->B     = Bnew;
3429 
3430   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3431 
3432   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3433   b->free_a       = PETSC_FALSE;
3434   b->free_ij      = PETSC_FALSE;
3435   ierr = MatDestroy(&B);CHKERRQ(ierr);
3436 
3437   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3438   bnew->free_a       = PETSC_TRUE;
3439   bnew->free_ij      = PETSC_TRUE;
3440 
3441   /* condense columns of maij->B */
3442   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3443   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3444   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3445   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3446   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3451 
3452 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3453 {
3454   PetscErrorCode ierr;
3455   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3456   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3457   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3458   Mat            M,Msub,B=a->B;
3459   MatScalar      *aa;
3460   Mat_SeqAIJ     *aij;
3461   PetscInt       *garray = a->garray,*colsub,Ncols;
3462   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3463   IS             iscol_sub,iscmap;
3464   const PetscInt *is_idx,*cmap;
3465   PetscBool      allcolumns=PETSC_FALSE;
3466   MPI_Comm       comm;
3467 
3468   PetscFunctionBegin;
3469   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3470 
3471   if (call == MAT_REUSE_MATRIX) {
3472     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3473     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3474     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3475 
3476     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3477     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3478 
3479     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3480     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3481 
3482     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3483 
3484   } else { /* call == MAT_INITIAL_MATRIX) */
3485     PetscBool flg;
3486 
3487     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3488     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3489 
3490     /* (1) iscol -> nonscalable iscol_local */
3491     /* Check for special case: each processor gets entire matrix columns */
3492     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3493     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3494     if (allcolumns) {
3495       iscol_sub = iscol_local;
3496       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3497       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3498 
3499     } else {
3500       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3501       PetscInt *idx,*cmap1,k;
3502       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3503       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3504       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3505       count = 0;
3506       k     = 0;
3507       for (i=0; i<Ncols; i++) {
3508         j = is_idx[i];
3509         if (j >= cstart && j < cend) {
3510           /* diagonal part of mat */
3511           idx[count]     = j;
3512           cmap1[count++] = i; /* column index in submat */
3513         } else if (Bn) {
3514           /* off-diagonal part of mat */
3515           if (j == garray[k]) {
3516             idx[count]     = j;
3517             cmap1[count++] = i;  /* column index in submat */
3518           } else if (j > garray[k]) {
3519             while (j > garray[k] && k < Bn-1) k++;
3520             if (j == garray[k]) {
3521               idx[count]     = j;
3522               cmap1[count++] = i; /* column index in submat */
3523             }
3524           }
3525         }
3526       }
3527       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3528 
3529       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3530       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3531       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3532 
3533       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3534     }
3535 
3536     /* (3) Create sequential Msub */
3537     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3538   }
3539 
3540   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3541   aij  = (Mat_SeqAIJ*)(Msub)->data;
3542   ii   = aij->i;
3543   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3544 
3545   /*
3546       m - number of local rows
3547       Ncols - number of columns (same on all processors)
3548       rstart - first row in new global matrix generated
3549   */
3550   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3551 
3552   if (call == MAT_INITIAL_MATRIX) {
3553     /* (4) Create parallel newmat */
3554     PetscMPIInt    rank,size;
3555     PetscInt       csize;
3556 
3557     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3558     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3559 
3560     /*
3561         Determine the number of non-zeros in the diagonal and off-diagonal
3562         portions of the matrix in order to do correct preallocation
3563     */
3564 
3565     /* first get start and end of "diagonal" columns */
3566     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3567     if (csize == PETSC_DECIDE) {
3568       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3569       if (mglobal == Ncols) { /* square matrix */
3570         nlocal = m;
3571       } else {
3572         nlocal = Ncols/size + ((Ncols % size) > rank);
3573       }
3574     } else {
3575       nlocal = csize;
3576     }
3577     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3578     rstart = rend - nlocal;
3579     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3580 
3581     /* next, compute all the lengths */
3582     jj    = aij->j;
3583     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3584     olens = dlens + m;
3585     for (i=0; i<m; i++) {
3586       jend = ii[i+1] - ii[i];
3587       olen = 0;
3588       dlen = 0;
3589       for (j=0; j<jend; j++) {
3590         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3591         else dlen++;
3592         jj++;
3593       }
3594       olens[i] = olen;
3595       dlens[i] = dlen;
3596     }
3597 
3598     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3599     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3600 
3601     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3602     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3603     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3604     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3605     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3606     ierr = PetscFree(dlens);CHKERRQ(ierr);
3607 
3608   } else { /* call == MAT_REUSE_MATRIX */
3609     M    = *newmat;
3610     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3611     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3612     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3613     /*
3614          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3615        rather than the slower MatSetValues().
3616     */
3617     M->was_assembled = PETSC_TRUE;
3618     M->assembled     = PETSC_FALSE;
3619   }
3620 
3621   /* (5) Set values of Msub to *newmat */
3622   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3623   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3624 
3625   jj   = aij->j;
3626   aa   = aij->a;
3627   for (i=0; i<m; i++) {
3628     row = rstart + i;
3629     nz  = ii[i+1] - ii[i];
3630     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3631     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3632     jj += nz; aa += nz;
3633   }
3634   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3635 
3636   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638 
3639   ierr = PetscFree(colsub);CHKERRQ(ierr);
3640 
3641   /* save Msub, iscol_sub and iscmap used in processor for next request */
3642   if (call ==  MAT_INITIAL_MATRIX) {
3643     *newmat = M;
3644     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3645     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3646 
3647     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3648     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3649 
3650     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3651     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3652 
3653     if (iscol_local) {
3654       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3655       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3656     }
3657   }
3658   PetscFunctionReturn(0);
3659 }
3660 
3661 /*
3662     Not great since it makes two copies of the submatrix, first an SeqAIJ
3663   in local and then by concatenating the local matrices the end result.
3664   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3665 
3666   Note: This requires a sequential iscol with all indices.
3667 */
3668 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3669 {
3670   PetscErrorCode ierr;
3671   PetscMPIInt    rank,size;
3672   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3673   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3674   Mat            M,Mreuse;
3675   MatScalar      *aa,*vwork;
3676   MPI_Comm       comm;
3677   Mat_SeqAIJ     *aij;
3678   PetscBool      colflag,allcolumns=PETSC_FALSE;
3679 
3680   PetscFunctionBegin;
3681   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3682   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3684 
3685   /* Check for special case: each processor gets entire matrix columns */
3686   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3687   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3688   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3689 
3690   if (call ==  MAT_REUSE_MATRIX) {
3691     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3692     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3693     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3694   } else {
3695     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3696   }
3697 
3698   /*
3699       m - number of local rows
3700       n - number of columns (same on all processors)
3701       rstart - first row in new global matrix generated
3702   */
3703   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3704   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3705   if (call == MAT_INITIAL_MATRIX) {
3706     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3707     ii  = aij->i;
3708     jj  = aij->j;
3709 
3710     /*
3711         Determine the number of non-zeros in the diagonal and off-diagonal
3712         portions of the matrix in order to do correct preallocation
3713     */
3714 
3715     /* first get start and end of "diagonal" columns */
3716     if (csize == PETSC_DECIDE) {
3717       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3718       if (mglobal == n) { /* square matrix */
3719         nlocal = m;
3720       } else {
3721         nlocal = n/size + ((n % size) > rank);
3722       }
3723     } else {
3724       nlocal = csize;
3725     }
3726     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3727     rstart = rend - nlocal;
3728     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3729 
3730     /* next, compute all the lengths */
3731     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3732     olens = dlens + m;
3733     for (i=0; i<m; i++) {
3734       jend = ii[i+1] - ii[i];
3735       olen = 0;
3736       dlen = 0;
3737       for (j=0; j<jend; j++) {
3738         if (*jj < rstart || *jj >= rend) olen++;
3739         else dlen++;
3740         jj++;
3741       }
3742       olens[i] = olen;
3743       dlens[i] = dlen;
3744     }
3745     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3746     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3747     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3748     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3749     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3750     ierr = PetscFree(dlens);CHKERRQ(ierr);
3751   } else {
3752     PetscInt ml,nl;
3753 
3754     M    = *newmat;
3755     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3756     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3757     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3758     /*
3759          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3760        rather than the slower MatSetValues().
3761     */
3762     M->was_assembled = PETSC_TRUE;
3763     M->assembled     = PETSC_FALSE;
3764   }
3765   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3766   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3767   ii   = aij->i;
3768   jj   = aij->j;
3769   aa   = aij->a;
3770   for (i=0; i<m; i++) {
3771     row   = rstart + i;
3772     nz    = ii[i+1] - ii[i];
3773     cwork = jj;     jj += nz;
3774     vwork = aa;     aa += nz;
3775     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3776   }
3777 
3778   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   *newmat = M;
3781 
3782   /* save submatrix used in processor for next request */
3783   if (call ==  MAT_INITIAL_MATRIX) {
3784     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3785     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3786   }
3787   PetscFunctionReturn(0);
3788 }
3789 
3790 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3791 {
3792   PetscInt       m,cstart, cend,j,nnz,i,d;
3793   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3794   const PetscInt *JJ;
3795   PetscScalar    *values;
3796   PetscErrorCode ierr;
3797   PetscBool      nooffprocentries;
3798 
3799   PetscFunctionBegin;
3800   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3801 
3802   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3803   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3804   m      = B->rmap->n;
3805   cstart = B->cmap->rstart;
3806   cend   = B->cmap->rend;
3807   rstart = B->rmap->rstart;
3808 
3809   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3810 
3811 #if defined(PETSC_USE_DEBUG)
3812   for (i=0; i<m; i++) {
3813     nnz = Ii[i+1]- Ii[i];
3814     JJ  = J + Ii[i];
3815     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3816     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3817     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3818   }
3819 #endif
3820 
3821   for (i=0; i<m; i++) {
3822     nnz     = Ii[i+1]- Ii[i];
3823     JJ      = J + Ii[i];
3824     nnz_max = PetscMax(nnz_max,nnz);
3825     d       = 0;
3826     for (j=0; j<nnz; j++) {
3827       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3828     }
3829     d_nnz[i] = d;
3830     o_nnz[i] = nnz - d;
3831   }
3832   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3833   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3834 
3835   if (v) values = (PetscScalar*)v;
3836   else {
3837     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3838   }
3839 
3840   for (i=0; i<m; i++) {
3841     ii   = i + rstart;
3842     nnz  = Ii[i+1]- Ii[i];
3843     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3844   }
3845   nooffprocentries    = B->nooffprocentries;
3846   B->nooffprocentries = PETSC_TRUE;
3847   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3848   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3849   B->nooffprocentries = nooffprocentries;
3850 
3851   if (!v) {
3852     ierr = PetscFree(values);CHKERRQ(ierr);
3853   }
3854   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 /*@
3859    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3860    (the default parallel PETSc format).
3861 
3862    Collective on MPI_Comm
3863 
3864    Input Parameters:
3865 +  B - the matrix
3866 .  i - the indices into j for the start of each local row (starts with zero)
3867 .  j - the column indices for each local row (starts with zero)
3868 -  v - optional values in the matrix
3869 
3870    Level: developer
3871 
3872    Notes:
3873        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3874      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3875      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3876 
3877        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3878 
3879        The format which is used for the sparse matrix input, is equivalent to a
3880     row-major ordering.. i.e for the following matrix, the input data expected is
3881     as shown
3882 
3883 $        1 0 0
3884 $        2 0 3     P0
3885 $       -------
3886 $        4 5 6     P1
3887 $
3888 $     Process0 [P0]: rows_owned=[0,1]
3889 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3890 $        j =  {0,0,2}  [size = 3]
3891 $        v =  {1,2,3}  [size = 3]
3892 $
3893 $     Process1 [P1]: rows_owned=[2]
3894 $        i =  {0,3}    [size = nrow+1  = 1+1]
3895 $        j =  {0,1,2}  [size = 3]
3896 $        v =  {4,5,6}  [size = 3]
3897 
3898 .keywords: matrix, aij, compressed row, sparse, parallel
3899 
3900 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3901           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3902 @*/
3903 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3904 {
3905   PetscErrorCode ierr;
3906 
3907   PetscFunctionBegin;
3908   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 /*@C
3913    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3914    (the default parallel PETSc format).  For good matrix assembly performance
3915    the user should preallocate the matrix storage by setting the parameters
3916    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3917    performance can be increased by more than a factor of 50.
3918 
3919    Collective on MPI_Comm
3920 
3921    Input Parameters:
3922 +  B - the matrix
3923 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3924            (same value is used for all local rows)
3925 .  d_nnz - array containing the number of nonzeros in the various rows of the
3926            DIAGONAL portion of the local submatrix (possibly different for each row)
3927            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3928            The size of this array is equal to the number of local rows, i.e 'm'.
3929            For matrices that will be factored, you must leave room for (and set)
3930            the diagonal entry even if it is zero.
3931 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3932            submatrix (same value is used for all local rows).
3933 -  o_nnz - array containing the number of nonzeros in the various rows of the
3934            OFF-DIAGONAL portion of the local submatrix (possibly different for
3935            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3936            structure. The size of this array is equal to the number
3937            of local rows, i.e 'm'.
3938 
3939    If the *_nnz parameter is given then the *_nz parameter is ignored
3940 
3941    The AIJ format (also called the Yale sparse matrix format or
3942    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3943    storage.  The stored row and column indices begin with zero.
3944    See Users-Manual: ch_mat for details.
3945 
3946    The parallel matrix is partitioned such that the first m0 rows belong to
3947    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3948    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3949 
3950    The DIAGONAL portion of the local submatrix of a processor can be defined
3951    as the submatrix which is obtained by extraction the part corresponding to
3952    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3953    first row that belongs to the processor, r2 is the last row belonging to
3954    the this processor, and c1-c2 is range of indices of the local part of a
3955    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3956    common case of a square matrix, the row and column ranges are the same and
3957    the DIAGONAL part is also square. The remaining portion of the local
3958    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3959 
3960    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3961 
3962    You can call MatGetInfo() to get information on how effective the preallocation was;
3963    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3964    You can also run with the option -info and look for messages with the string
3965    malloc in them to see if additional memory allocation was needed.
3966 
3967    Example usage:
3968 
3969    Consider the following 8x8 matrix with 34 non-zero values, that is
3970    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3971    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3972    as follows:
3973 
3974 .vb
3975             1  2  0  |  0  3  0  |  0  4
3976     Proc0   0  5  6  |  7  0  0  |  8  0
3977             9  0 10  | 11  0  0  | 12  0
3978     -------------------------------------
3979            13  0 14  | 15 16 17  |  0  0
3980     Proc1   0 18  0  | 19 20 21  |  0  0
3981             0  0  0  | 22 23  0  | 24  0
3982     -------------------------------------
3983     Proc2  25 26 27  |  0  0 28  | 29  0
3984            30  0  0  | 31 32 33  |  0 34
3985 .ve
3986 
3987    This can be represented as a collection of submatrices as:
3988 
3989 .vb
3990       A B C
3991       D E F
3992       G H I
3993 .ve
3994 
3995    Where the submatrices A,B,C are owned by proc0, D,E,F are
3996    owned by proc1, G,H,I are owned by proc2.
3997 
3998    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4000    The 'M','N' parameters are 8,8, and have the same values on all procs.
4001 
4002    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4003    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4004    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4005    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4006    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4007    matrix, ans [DF] as another SeqAIJ matrix.
4008 
4009    When d_nz, o_nz parameters are specified, d_nz storage elements are
4010    allocated for every row of the local diagonal submatrix, and o_nz
4011    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4012    One way to choose d_nz and o_nz is to use the max nonzerors per local
4013    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4014    In this case, the values of d_nz,o_nz are:
4015 .vb
4016      proc0 : dnz = 2, o_nz = 2
4017      proc1 : dnz = 3, o_nz = 2
4018      proc2 : dnz = 1, o_nz = 4
4019 .ve
4020    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4021    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4022    for proc3. i.e we are using 12+15+10=37 storage locations to store
4023    34 values.
4024 
4025    When d_nnz, o_nnz parameters are specified, the storage is specified
4026    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4027    In the above case the values for d_nnz,o_nnz are:
4028 .vb
4029      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4030      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4031      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4032 .ve
4033    Here the space allocated is sum of all the above values i.e 34, and
4034    hence pre-allocation is perfect.
4035 
4036    Level: intermediate
4037 
4038 .keywords: matrix, aij, compressed row, sparse, parallel
4039 
4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4041           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4042 @*/
4043 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4044 {
4045   PetscErrorCode ierr;
4046 
4047   PetscFunctionBegin;
4048   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4049   PetscValidType(B,1);
4050   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 /*@
4055      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4056          CSR format the local rows.
4057 
4058    Collective on MPI_Comm
4059 
4060    Input Parameters:
4061 +  comm - MPI communicator
4062 .  m - number of local rows (Cannot be PETSC_DECIDE)
4063 .  n - This value should be the same as the local size used in creating the
4064        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4065        calculated if N is given) For square matrices n is almost always m.
4066 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4067 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4068 .   i - row indices
4069 .   j - column indices
4070 -   a - matrix values
4071 
4072    Output Parameter:
4073 .   mat - the matrix
4074 
4075    Level: intermediate
4076 
4077    Notes:
4078        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4079      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4080      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4081 
4082        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4083 
4084        The format which is used for the sparse matrix input, is equivalent to a
4085     row-major ordering.. i.e for the following matrix, the input data expected is
4086     as shown
4087 
4088 $        1 0 0
4089 $        2 0 3     P0
4090 $       -------
4091 $        4 5 6     P1
4092 $
4093 $     Process0 [P0]: rows_owned=[0,1]
4094 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4095 $        j =  {0,0,2}  [size = 3]
4096 $        v =  {1,2,3}  [size = 3]
4097 $
4098 $     Process1 [P1]: rows_owned=[2]
4099 $        i =  {0,3}    [size = nrow+1  = 1+1]
4100 $        j =  {0,1,2}  [size = 3]
4101 $        v =  {4,5,6}  [size = 3]
4102 
4103 .keywords: matrix, aij, compressed row, sparse, parallel
4104 
4105 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4106           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4107 @*/
4108 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4109 {
4110   PetscErrorCode ierr;
4111 
4112   PetscFunctionBegin;
4113   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4114   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4115   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4116   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4117   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4118   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4119   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4120   PetscFunctionReturn(0);
4121 }
4122 
4123 /*@C
4124    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4125    (the default parallel PETSc format).  For good matrix assembly performance
4126    the user should preallocate the matrix storage by setting the parameters
4127    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4128    performance can be increased by more than a factor of 50.
4129 
4130    Collective on MPI_Comm
4131 
4132    Input Parameters:
4133 +  comm - MPI communicator
4134 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4135            This value should be the same as the local size used in creating the
4136            y vector for the matrix-vector product y = Ax.
4137 .  n - This value should be the same as the local size used in creating the
4138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4139        calculated if N is given) For square matrices n is almost always m.
4140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4142 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4143            (same value is used for all local rows)
4144 .  d_nnz - array containing the number of nonzeros in the various rows of the
4145            DIAGONAL portion of the local submatrix (possibly different for each row)
4146            or NULL, if d_nz is used to specify the nonzero structure.
4147            The size of this array is equal to the number of local rows, i.e 'm'.
4148 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4149            submatrix (same value is used for all local rows).
4150 -  o_nnz - array containing the number of nonzeros in the various rows of the
4151            OFF-DIAGONAL portion of the local submatrix (possibly different for
4152            each row) or NULL, if o_nz is used to specify the nonzero
4153            structure. The size of this array is equal to the number
4154            of local rows, i.e 'm'.
4155 
4156    Output Parameter:
4157 .  A - the matrix
4158 
4159    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4160    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4161    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4162 
4163    Notes:
4164    If the *_nnz parameter is given then the *_nz parameter is ignored
4165 
4166    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4167    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4168    storage requirements for this matrix.
4169 
4170    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4171    processor than it must be used on all processors that share the object for
4172    that argument.
4173 
4174    The user MUST specify either the local or global matrix dimensions
4175    (possibly both).
4176 
4177    The parallel matrix is partitioned across processors such that the
4178    first m0 rows belong to process 0, the next m1 rows belong to
4179    process 1, the next m2 rows belong to process 2 etc.. where
4180    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4181    values corresponding to [m x N] submatrix.
4182 
4183    The columns are logically partitioned with the n0 columns belonging
4184    to 0th partition, the next n1 columns belonging to the next
4185    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4186 
4187    The DIAGONAL portion of the local submatrix on any given processor
4188    is the submatrix corresponding to the rows and columns m,n
4189    corresponding to the given processor. i.e diagonal matrix on
4190    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4191    etc. The remaining portion of the local submatrix [m x (N-n)]
4192    constitute the OFF-DIAGONAL portion. The example below better
4193    illustrates this concept.
4194 
4195    For a square global matrix we define each processor's diagonal portion
4196    to be its local rows and the corresponding columns (a square submatrix);
4197    each processor's off-diagonal portion encompasses the remainder of the
4198    local matrix (a rectangular submatrix).
4199 
4200    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4201 
4202    When calling this routine with a single process communicator, a matrix of
4203    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4204    type of communicator, use the construction mechanism
4205 .vb
4206      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4207 .ve
4208 
4209 $     MatCreate(...,&A);
4210 $     MatSetType(A,MATMPIAIJ);
4211 $     MatSetSizes(A, m,n,M,N);
4212 $     MatMPIAIJSetPreallocation(A,...);
4213 
4214    By default, this format uses inodes (identical nodes) when possible.
4215    We search for consecutive rows with the same nonzero structure, thereby
4216    reusing matrix information to achieve increased efficiency.
4217 
4218    Options Database Keys:
4219 +  -mat_no_inode  - Do not use inodes
4220 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4221 
4222 
4223 
4224    Example usage:
4225 
4226    Consider the following 8x8 matrix with 34 non-zero values, that is
4227    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4228    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4229    as follows
4230 
4231 .vb
4232             1  2  0  |  0  3  0  |  0  4
4233     Proc0   0  5  6  |  7  0  0  |  8  0
4234             9  0 10  | 11  0  0  | 12  0
4235     -------------------------------------
4236            13  0 14  | 15 16 17  |  0  0
4237     Proc1   0 18  0  | 19 20 21  |  0  0
4238             0  0  0  | 22 23  0  | 24  0
4239     -------------------------------------
4240     Proc2  25 26 27  |  0  0 28  | 29  0
4241            30  0  0  | 31 32 33  |  0 34
4242 .ve
4243 
4244    This can be represented as a collection of submatrices as
4245 
4246 .vb
4247       A B C
4248       D E F
4249       G H I
4250 .ve
4251 
4252    Where the submatrices A,B,C are owned by proc0, D,E,F are
4253    owned by proc1, G,H,I are owned by proc2.
4254 
4255    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4256    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4257    The 'M','N' parameters are 8,8, and have the same values on all procs.
4258 
4259    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4260    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4261    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4262    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4263    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4264    matrix, ans [DF] as another SeqAIJ matrix.
4265 
4266    When d_nz, o_nz parameters are specified, d_nz storage elements are
4267    allocated for every row of the local diagonal submatrix, and o_nz
4268    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4269    One way to choose d_nz and o_nz is to use the max nonzerors per local
4270    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4271    In this case, the values of d_nz,o_nz are
4272 .vb
4273      proc0 : dnz = 2, o_nz = 2
4274      proc1 : dnz = 3, o_nz = 2
4275      proc2 : dnz = 1, o_nz = 4
4276 .ve
4277    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4278    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4279    for proc3. i.e we are using 12+15+10=37 storage locations to store
4280    34 values.
4281 
4282    When d_nnz, o_nnz parameters are specified, the storage is specified
4283    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4284    In the above case the values for d_nnz,o_nnz are
4285 .vb
4286      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4287      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4288      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4289 .ve
4290    Here the space allocated is sum of all the above values i.e 34, and
4291    hence pre-allocation is perfect.
4292 
4293    Level: intermediate
4294 
4295 .keywords: matrix, aij, compressed row, sparse, parallel
4296 
4297 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4298           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4299 @*/
4300 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4301 {
4302   PetscErrorCode ierr;
4303   PetscMPIInt    size;
4304 
4305   PetscFunctionBegin;
4306   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4307   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4308   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4309   if (size > 1) {
4310     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4311     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4312   } else {
4313     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4314     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4315   }
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4320 {
4321   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4322   PetscBool      flg;
4323   PetscErrorCode ierr;
4324 
4325   PetscFunctionBegin;
4326   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4327   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4328   if (Ad)     *Ad     = a->A;
4329   if (Ao)     *Ao     = a->B;
4330   if (colmap) *colmap = a->garray;
4331   PetscFunctionReturn(0);
4332 }
4333 
4334 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4335 {
4336   PetscErrorCode ierr;
4337   PetscInt       m,N,i,rstart,nnz,Ii;
4338   PetscInt       *indx;
4339   PetscScalar    *values;
4340 
4341   PetscFunctionBegin;
4342   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4343   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4344     PetscInt       *dnz,*onz,sum,bs,cbs;
4345 
4346     if (n == PETSC_DECIDE) {
4347       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4348     }
4349     /* Check sum(n) = N */
4350     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4352 
4353     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4354     rstart -= m;
4355 
4356     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4357     for (i=0; i<m; i++) {
4358       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4359       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4360       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4361     }
4362 
4363     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4364     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4365     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4366     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4367     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4368     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4369     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4370     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4371   }
4372 
4373   /* numeric phase */
4374   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4375   for (i=0; i<m; i++) {
4376     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4377     Ii   = i + rstart;
4378     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4379     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4380   }
4381   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4387 {
4388   PetscErrorCode    ierr;
4389   PetscMPIInt       rank;
4390   PetscInt          m,N,i,rstart,nnz;
4391   size_t            len;
4392   const PetscInt    *indx;
4393   PetscViewer       out;
4394   char              *name;
4395   Mat               B;
4396   const PetscScalar *values;
4397 
4398   PetscFunctionBegin;
4399   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4400   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4401   /* Should this be the type of the diagonal block of A? */
4402   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4403   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4404   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4405   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4406   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4407   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4408   for (i=0; i<m; i++) {
4409     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4410     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4411     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4412   }
4413   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4414   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4415 
4416   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4417   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4418   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4419   sprintf(name,"%s.%d",outfile,rank);
4420   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4421   ierr = PetscFree(name);CHKERRQ(ierr);
4422   ierr = MatView(B,out);CHKERRQ(ierr);
4423   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4424   ierr = MatDestroy(&B);CHKERRQ(ierr);
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4429 {
4430   PetscErrorCode      ierr;
4431   Mat_Merge_SeqsToMPI *merge;
4432   PetscContainer      container;
4433 
4434   PetscFunctionBegin;
4435   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4436   if (container) {
4437     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4448     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4449     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4450     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4451     ierr = PetscFree(merge);CHKERRQ(ierr);
4452     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4453   }
4454   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4455   PetscFunctionReturn(0);
4456 }
4457 
4458 #include <../src/mat/utils/freespace.h>
4459 #include <petscbt.h>
4460 
4461 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4462 {
4463   PetscErrorCode      ierr;
4464   MPI_Comm            comm;
4465   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4466   PetscMPIInt         size,rank,taga,*len_s;
4467   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4468   PetscInt            proc,m;
4469   PetscInt            **buf_ri,**buf_rj;
4470   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4471   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4472   MPI_Request         *s_waits,*r_waits;
4473   MPI_Status          *status;
4474   MatScalar           *aa=a->a;
4475   MatScalar           **abuf_r,*ba_i;
4476   Mat_Merge_SeqsToMPI *merge;
4477   PetscContainer      container;
4478 
4479   PetscFunctionBegin;
4480   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4481   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4482 
4483   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4485 
4486   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4487   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4488 
4489   bi     = merge->bi;
4490   bj     = merge->bj;
4491   buf_ri = merge->buf_ri;
4492   buf_rj = merge->buf_rj;
4493 
4494   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4495   owners = merge->rowmap->range;
4496   len_s  = merge->len_s;
4497 
4498   /* send and recv matrix values */
4499   /*-----------------------------*/
4500   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4501   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4502 
4503   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4504   for (proc=0,k=0; proc<size; proc++) {
4505     if (!len_s[proc]) continue;
4506     i    = owners[proc];
4507     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4508     k++;
4509   }
4510 
4511   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4512   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4513   ierr = PetscFree(status);CHKERRQ(ierr);
4514 
4515   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4516   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4517 
4518   /* insert mat values of mpimat */
4519   /*----------------------------*/
4520   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4521   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4522 
4523   for (k=0; k<merge->nrecv; k++) {
4524     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4525     nrows       = *(buf_ri_k[k]);
4526     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4527     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4528   }
4529 
4530   /* set values of ba */
4531   m = merge->rowmap->n;
4532   for (i=0; i<m; i++) {
4533     arow = owners[rank] + i;
4534     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4535     bnzi = bi[i+1] - bi[i];
4536     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4537 
4538     /* add local non-zero vals of this proc's seqmat into ba */
4539     anzi   = ai[arow+1] - ai[arow];
4540     aj     = a->j + ai[arow];
4541     aa     = a->a + ai[arow];
4542     nextaj = 0;
4543     for (j=0; nextaj<anzi; j++) {
4544       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4545         ba_i[j] += aa[nextaj++];
4546       }
4547     }
4548 
4549     /* add received vals into ba */
4550     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4551       /* i-th row */
4552       if (i == *nextrow[k]) {
4553         anzi   = *(nextai[k]+1) - *nextai[k];
4554         aj     = buf_rj[k] + *(nextai[k]);
4555         aa     = abuf_r[k] + *(nextai[k]);
4556         nextaj = 0;
4557         for (j=0; nextaj<anzi; j++) {
4558           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4559             ba_i[j] += aa[nextaj++];
4560           }
4561         }
4562         nextrow[k]++; nextai[k]++;
4563       }
4564     }
4565     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4566   }
4567   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4568   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569 
4570   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4571   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4572   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4573   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4574   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4579 {
4580   PetscErrorCode      ierr;
4581   Mat                 B_mpi;
4582   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4583   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4584   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4585   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4586   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4587   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4588   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4589   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4590   MPI_Status          *status;
4591   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4592   PetscBT             lnkbt;
4593   Mat_Merge_SeqsToMPI *merge;
4594   PetscContainer      container;
4595 
4596   PetscFunctionBegin;
4597   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4598 
4599   /* make sure it is a PETSc comm */
4600   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4602   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4603 
4604   ierr = PetscNew(&merge);CHKERRQ(ierr);
4605   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4606 
4607   /* determine row ownership */
4608   /*---------------------------------------------------------*/
4609   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4613   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4614   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4615   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4616 
4617   m      = merge->rowmap->n;
4618   owners = merge->rowmap->range;
4619 
4620   /* determine the number of messages to send, their lengths */
4621   /*---------------------------------------------------------*/
4622   len_s = merge->len_s;
4623 
4624   len          = 0; /* length of buf_si[] */
4625   merge->nsend = 0;
4626   for (proc=0; proc<size; proc++) {
4627     len_si[proc] = 0;
4628     if (proc == rank) {
4629       len_s[proc] = 0;
4630     } else {
4631       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4632       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4633     }
4634     if (len_s[proc]) {
4635       merge->nsend++;
4636       nrows = 0;
4637       for (i=owners[proc]; i<owners[proc+1]; i++) {
4638         if (ai[i+1] > ai[i]) nrows++;
4639       }
4640       len_si[proc] = 2*(nrows+1);
4641       len         += len_si[proc];
4642     }
4643   }
4644 
4645   /* determine the number and length of messages to receive for ij-structure */
4646   /*-------------------------------------------------------------------------*/
4647   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4648   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4649 
4650   /* post the Irecv of j-structure */
4651   /*-------------------------------*/
4652   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4653   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4654 
4655   /* post the Isend of j-structure */
4656   /*--------------------------------*/
4657   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4658 
4659   for (proc=0, k=0; proc<size; proc++) {
4660     if (!len_s[proc]) continue;
4661     i    = owners[proc];
4662     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4663     k++;
4664   }
4665 
4666   /* receives and sends of j-structure are complete */
4667   /*------------------------------------------------*/
4668   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4669   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4670 
4671   /* send and recv i-structure */
4672   /*---------------------------*/
4673   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4674   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4675 
4676   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4677   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4678   for (proc=0,k=0; proc<size; proc++) {
4679     if (!len_s[proc]) continue;
4680     /* form outgoing message for i-structure:
4681          buf_si[0]:                 nrows to be sent
4682                [1:nrows]:           row index (global)
4683                [nrows+1:2*nrows+1]: i-structure index
4684     */
4685     /*-------------------------------------------*/
4686     nrows       = len_si[proc]/2 - 1;
4687     buf_si_i    = buf_si + nrows+1;
4688     buf_si[0]   = nrows;
4689     buf_si_i[0] = 0;
4690     nrows       = 0;
4691     for (i=owners[proc]; i<owners[proc+1]; i++) {
4692       anzi = ai[i+1] - ai[i];
4693       if (anzi) {
4694         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4695         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4696         nrows++;
4697       }
4698     }
4699     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4700     k++;
4701     buf_si += len_si[proc];
4702   }
4703 
4704   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4705   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4706 
4707   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4708   for (i=0; i<merge->nrecv; i++) {
4709     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4710   }
4711 
4712   ierr = PetscFree(len_si);CHKERRQ(ierr);
4713   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4714   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4715   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4716   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4717   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4718   ierr = PetscFree(status);CHKERRQ(ierr);
4719 
4720   /* compute a local seq matrix in each processor */
4721   /*----------------------------------------------*/
4722   /* allocate bi array and free space for accumulating nonzero column info */
4723   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4724   bi[0] = 0;
4725 
4726   /* create and initialize a linked list */
4727   nlnk = N+1;
4728   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4729 
4730   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4731   len  = ai[owners[rank+1]] - ai[owners[rank]];
4732   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4733 
4734   current_space = free_space;
4735 
4736   /* determine symbolic info for each local row */
4737   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4738 
4739   for (k=0; k<merge->nrecv; k++) {
4740     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4741     nrows       = *buf_ri_k[k];
4742     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4743     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4744   }
4745 
4746   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4747   len  = 0;
4748   for (i=0; i<m; i++) {
4749     bnzi = 0;
4750     /* add local non-zero cols of this proc's seqmat into lnk */
4751     arow  = owners[rank] + i;
4752     anzi  = ai[arow+1] - ai[arow];
4753     aj    = a->j + ai[arow];
4754     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4755     bnzi += nlnk;
4756     /* add received col data into lnk */
4757     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4758       if (i == *nextrow[k]) { /* i-th row */
4759         anzi  = *(nextai[k]+1) - *nextai[k];
4760         aj    = buf_rj[k] + *nextai[k];
4761         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4762         bnzi += nlnk;
4763         nextrow[k]++; nextai[k]++;
4764       }
4765     }
4766     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4767 
4768     /* if free space is not available, make more free space */
4769     if (current_space->local_remaining<bnzi) {
4770       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4771       nspacedouble++;
4772     }
4773     /* copy data into free space, then initialize lnk */
4774     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4775     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4776 
4777     current_space->array           += bnzi;
4778     current_space->local_used      += bnzi;
4779     current_space->local_remaining -= bnzi;
4780 
4781     bi[i+1] = bi[i] + bnzi;
4782   }
4783 
4784   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4785 
4786   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4787   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4788   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4789 
4790   /* create symbolic parallel matrix B_mpi */
4791   /*---------------------------------------*/
4792   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4793   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4794   if (n==PETSC_DECIDE) {
4795     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4796   } else {
4797     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4798   }
4799   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4800   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4801   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4802   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4803   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4804 
4805   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4806   B_mpi->assembled    = PETSC_FALSE;
4807   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4808   merge->bi           = bi;
4809   merge->bj           = bj;
4810   merge->buf_ri       = buf_ri;
4811   merge->buf_rj       = buf_rj;
4812   merge->coi          = NULL;
4813   merge->coj          = NULL;
4814   merge->owners_co    = NULL;
4815 
4816   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4817 
4818   /* attach the supporting struct to B_mpi for reuse */
4819   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4820   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4821   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4822   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4823   *mpimat = B_mpi;
4824 
4825   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4826   PetscFunctionReturn(0);
4827 }
4828 
4829 /*@C
4830       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4831                  matrices from each processor
4832 
4833     Collective on MPI_Comm
4834 
4835    Input Parameters:
4836 +    comm - the communicators the parallel matrix will live on
4837 .    seqmat - the input sequential matrices
4838 .    m - number of local rows (or PETSC_DECIDE)
4839 .    n - number of local columns (or PETSC_DECIDE)
4840 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4841 
4842    Output Parameter:
4843 .    mpimat - the parallel matrix generated
4844 
4845     Level: advanced
4846 
4847    Notes:
4848      The dimensions of the sequential matrix in each processor MUST be the same.
4849      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4850      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4851 @*/
4852 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4853 {
4854   PetscErrorCode ierr;
4855   PetscMPIInt    size;
4856 
4857   PetscFunctionBegin;
4858   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4859   if (size == 1) {
4860     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4861     if (scall == MAT_INITIAL_MATRIX) {
4862       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4863     } else {
4864       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4865     }
4866     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4867     PetscFunctionReturn(0);
4868   }
4869   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4870   if (scall == MAT_INITIAL_MATRIX) {
4871     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4872   }
4873   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4874   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4875   PetscFunctionReturn(0);
4876 }
4877 
4878 /*@
4879      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4880           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4881           with MatGetSize()
4882 
4883     Not Collective
4884 
4885    Input Parameters:
4886 +    A - the matrix
4887 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4888 
4889    Output Parameter:
4890 .    A_loc - the local sequential matrix generated
4891 
4892     Level: developer
4893 
4894 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4895 
4896 @*/
4897 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4898 {
4899   PetscErrorCode ierr;
4900   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4901   Mat_SeqAIJ     *mat,*a,*b;
4902   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4903   MatScalar      *aa,*ba,*cam;
4904   PetscScalar    *ca;
4905   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4906   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4907   PetscBool      match;
4908   MPI_Comm       comm;
4909   PetscMPIInt    size;
4910 
4911   PetscFunctionBegin;
4912   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4913   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4914   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4916   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4917 
4918   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4919   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4920   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4921   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4922   aa = a->a; ba = b->a;
4923   if (scall == MAT_INITIAL_MATRIX) {
4924     if (size == 1) {
4925       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4926       PetscFunctionReturn(0);
4927     }
4928 
4929     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4930     ci[0] = 0;
4931     for (i=0; i<am; i++) {
4932       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4933     }
4934     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4935     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4936     k    = 0;
4937     for (i=0; i<am; i++) {
4938       ncols_o = bi[i+1] - bi[i];
4939       ncols_d = ai[i+1] - ai[i];
4940       /* off-diagonal portion of A */
4941       for (jo=0; jo<ncols_o; jo++) {
4942         col = cmap[*bj];
4943         if (col >= cstart) break;
4944         cj[k]   = col; bj++;
4945         ca[k++] = *ba++;
4946       }
4947       /* diagonal portion of A */
4948       for (j=0; j<ncols_d; j++) {
4949         cj[k]   = cstart + *aj++;
4950         ca[k++] = *aa++;
4951       }
4952       /* off-diagonal portion of A */
4953       for (j=jo; j<ncols_o; j++) {
4954         cj[k]   = cmap[*bj++];
4955         ca[k++] = *ba++;
4956       }
4957     }
4958     /* put together the new matrix */
4959     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4960     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4961     /* Since these are PETSc arrays, change flags to free them as necessary. */
4962     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4963     mat->free_a  = PETSC_TRUE;
4964     mat->free_ij = PETSC_TRUE;
4965     mat->nonew   = 0;
4966   } else if (scall == MAT_REUSE_MATRIX) {
4967     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4968     ci = mat->i; cj = mat->j; cam = mat->a;
4969     for (i=0; i<am; i++) {
4970       /* off-diagonal portion of A */
4971       ncols_o = bi[i+1] - bi[i];
4972       for (jo=0; jo<ncols_o; jo++) {
4973         col = cmap[*bj];
4974         if (col >= cstart) break;
4975         *cam++ = *ba++; bj++;
4976       }
4977       /* diagonal portion of A */
4978       ncols_d = ai[i+1] - ai[i];
4979       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4980       /* off-diagonal portion of A */
4981       for (j=jo; j<ncols_o; j++) {
4982         *cam++ = *ba++; bj++;
4983       }
4984     }
4985   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4986   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4987   PetscFunctionReturn(0);
4988 }
4989 
4990 /*@C
4991      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4992 
4993     Not Collective
4994 
4995    Input Parameters:
4996 +    A - the matrix
4997 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4998 -    row, col - index sets of rows and columns to extract (or NULL)
4999 
5000    Output Parameter:
5001 .    A_loc - the local sequential matrix generated
5002 
5003     Level: developer
5004 
5005 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5006 
5007 @*/
5008 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5009 {
5010   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5011   PetscErrorCode ierr;
5012   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5013   IS             isrowa,iscola;
5014   Mat            *aloc;
5015   PetscBool      match;
5016 
5017   PetscFunctionBegin;
5018   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5019   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5020   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5021   if (!row) {
5022     start = A->rmap->rstart; end = A->rmap->rend;
5023     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5024   } else {
5025     isrowa = *row;
5026   }
5027   if (!col) {
5028     start = A->cmap->rstart;
5029     cmap  = a->garray;
5030     nzA   = a->A->cmap->n;
5031     nzB   = a->B->cmap->n;
5032     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5033     ncols = 0;
5034     for (i=0; i<nzB; i++) {
5035       if (cmap[i] < start) idx[ncols++] = cmap[i];
5036       else break;
5037     }
5038     imark = i;
5039     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5040     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5041     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5042   } else {
5043     iscola = *col;
5044   }
5045   if (scall != MAT_INITIAL_MATRIX) {
5046     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5047     aloc[0] = *A_loc;
5048   }
5049   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5050   *A_loc = aloc[0];
5051   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5052   if (!row) {
5053     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5054   }
5055   if (!col) {
5056     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5057   }
5058   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5059   PetscFunctionReturn(0);
5060 }
5061 
5062 /*@C
5063     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5064 
5065     Collective on Mat
5066 
5067    Input Parameters:
5068 +    A,B - the matrices in mpiaij format
5069 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5070 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5071 
5072    Output Parameter:
5073 +    rowb, colb - index sets of rows and columns of B to extract
5074 -    B_seq - the sequential matrix generated
5075 
5076     Level: developer
5077 
5078 @*/
5079 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5080 {
5081   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5082   PetscErrorCode ierr;
5083   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5084   IS             isrowb,iscolb;
5085   Mat            *bseq=NULL;
5086 
5087   PetscFunctionBegin;
5088   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5089     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5090   }
5091   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5092 
5093   if (scall == MAT_INITIAL_MATRIX) {
5094     start = A->cmap->rstart;
5095     cmap  = a->garray;
5096     nzA   = a->A->cmap->n;
5097     nzB   = a->B->cmap->n;
5098     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5099     ncols = 0;
5100     for (i=0; i<nzB; i++) {  /* row < local row index */
5101       if (cmap[i] < start) idx[ncols++] = cmap[i];
5102       else break;
5103     }
5104     imark = i;
5105     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5106     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5107     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5108     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5109   } else {
5110     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5111     isrowb  = *rowb; iscolb = *colb;
5112     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5113     bseq[0] = *B_seq;
5114   }
5115   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5116   *B_seq = bseq[0];
5117   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5118   if (!rowb) {
5119     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5120   } else {
5121     *rowb = isrowb;
5122   }
5123   if (!colb) {
5124     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5125   } else {
5126     *colb = iscolb;
5127   }
5128   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5129   PetscFunctionReturn(0);
5130 }
5131 
5132 /*
5133     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5134     of the OFF-DIAGONAL portion of local A
5135 
5136     Collective on Mat
5137 
5138    Input Parameters:
5139 +    A,B - the matrices in mpiaij format
5140 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5141 
5142    Output Parameter:
5143 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5144 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5145 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5146 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5147 
5148     Level: developer
5149 
5150 */
5151 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5152 {
5153   VecScatter_MPI_General *gen_to,*gen_from;
5154   PetscErrorCode         ierr;
5155   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5156   Mat_SeqAIJ             *b_oth;
5157   VecScatter             ctx;
5158   MPI_Comm               comm;
5159   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5160   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5161   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5162   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5163   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5164   MPI_Request            *rwaits = NULL,*swaits = NULL;
5165   MPI_Status             *sstatus,rstatus;
5166   PetscMPIInt            jj,size;
5167   VecScatterType         type;
5168   PetscBool              mpi1;
5169 
5170   PetscFunctionBegin;
5171   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5173 
5174   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5175     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5176   }
5177   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5179 
5180   if (size == 1) {
5181     startsj_s = NULL;
5182     bufa_ptr  = NULL;
5183     *B_oth    = NULL;
5184     PetscFunctionReturn(0);
5185   }
5186 
5187   ctx = a->Mvctx;
5188   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5189   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5190   if (!mpi1) {
5191     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5192      thus create a->Mvctx_mpi1 */
5193     if (!a->Mvctx_mpi1) {
5194       a->Mvctx_mpi1_flg = PETSC_TRUE;
5195       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5196     }
5197     ctx = a->Mvctx_mpi1;
5198   }
5199   tag = ((PetscObject)ctx)->tag;
5200 
5201   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5202   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5203   nrecvs   = gen_from->n;
5204   nsends   = gen_to->n;
5205 
5206   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5207   srow    = gen_to->indices;    /* local row index to be sent */
5208   sstarts = gen_to->starts;
5209   sprocs  = gen_to->procs;
5210   sstatus = gen_to->sstatus;
5211   sbs     = gen_to->bs;
5212   rstarts = gen_from->starts;
5213   rprocs  = gen_from->procs;
5214   rbs     = gen_from->bs;
5215 
5216   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5217   if (scall == MAT_INITIAL_MATRIX) {
5218     /* i-array */
5219     /*---------*/
5220     /*  post receives */
5221     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5222     for (i=0; i<nrecvs; i++) {
5223       rowlen = rvalues + rstarts[i]*rbs;
5224       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5225       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5226     }
5227 
5228     /* pack the outgoing message */
5229     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5230 
5231     sstartsj[0] = 0;
5232     rstartsj[0] = 0;
5233     len         = 0; /* total length of j or a array to be sent */
5234     k           = 0;
5235     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5236     for (i=0; i<nsends; i++) {
5237       rowlen = svalues + sstarts[i]*sbs;
5238       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5239       for (j=0; j<nrows; j++) {
5240         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5241         for (l=0; l<sbs; l++) {
5242           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5243 
5244           rowlen[j*sbs+l] = ncols;
5245 
5246           len += ncols;
5247           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5248         }
5249         k++;
5250       }
5251       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5252 
5253       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5254     }
5255     /* recvs and sends of i-array are completed */
5256     i = nrecvs;
5257     while (i--) {
5258       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5259     }
5260     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5261     ierr = PetscFree(svalues);CHKERRQ(ierr);
5262 
5263     /* allocate buffers for sending j and a arrays */
5264     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5265     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5266 
5267     /* create i-array of B_oth */
5268     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5269 
5270     b_othi[0] = 0;
5271     len       = 0; /* total length of j or a array to be received */
5272     k         = 0;
5273     for (i=0; i<nrecvs; i++) {
5274       rowlen = rvalues + rstarts[i]*rbs;
5275       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5276       for (j=0; j<nrows; j++) {
5277         b_othi[k+1] = b_othi[k] + rowlen[j];
5278         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5279         k++;
5280       }
5281       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5282     }
5283     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5284 
5285     /* allocate space for j and a arrrays of B_oth */
5286     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5287     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5288 
5289     /* j-array */
5290     /*---------*/
5291     /*  post receives of j-array */
5292     for (i=0; i<nrecvs; i++) {
5293       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5294       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5295     }
5296 
5297     /* pack the outgoing message j-array */
5298     k = 0;
5299     for (i=0; i<nsends; i++) {
5300       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5301       bufJ  = bufj+sstartsj[i];
5302       for (j=0; j<nrows; j++) {
5303         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5304         for (ll=0; ll<sbs; ll++) {
5305           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5306           for (l=0; l<ncols; l++) {
5307             *bufJ++ = cols[l];
5308           }
5309           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5310         }
5311       }
5312       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* recvs and sends of j-array are completed */
5316     i = nrecvs;
5317     while (i--) {
5318       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5319     }
5320     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5321   } else if (scall == MAT_REUSE_MATRIX) {
5322     sstartsj = *startsj_s;
5323     rstartsj = *startsj_r;
5324     bufa     = *bufa_ptr;
5325     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5326     b_otha   = b_oth->a;
5327   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5328 
5329   /* a-array */
5330   /*---------*/
5331   /*  post receives of a-array */
5332   for (i=0; i<nrecvs; i++) {
5333     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5334     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5335   }
5336 
5337   /* pack the outgoing message a-array */
5338   k = 0;
5339   for (i=0; i<nsends; i++) {
5340     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5341     bufA  = bufa+sstartsj[i];
5342     for (j=0; j<nrows; j++) {
5343       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5344       for (ll=0; ll<sbs; ll++) {
5345         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5346         for (l=0; l<ncols; l++) {
5347           *bufA++ = vals[l];
5348         }
5349         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5350       }
5351     }
5352     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5353   }
5354   /* recvs and sends of a-array are completed */
5355   i = nrecvs;
5356   while (i--) {
5357     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5358   }
5359   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5360   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5361 
5362   if (scall == MAT_INITIAL_MATRIX) {
5363     /* put together the new matrix */
5364     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5365 
5366     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5367     /* Since these are PETSc arrays, change flags to free them as necessary. */
5368     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5369     b_oth->free_a  = PETSC_TRUE;
5370     b_oth->free_ij = PETSC_TRUE;
5371     b_oth->nonew   = 0;
5372 
5373     ierr = PetscFree(bufj);CHKERRQ(ierr);
5374     if (!startsj_s || !bufa_ptr) {
5375       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5376       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5377     } else {
5378       *startsj_s = sstartsj;
5379       *startsj_r = rstartsj;
5380       *bufa_ptr  = bufa;
5381     }
5382   }
5383   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5384   PetscFunctionReturn(0);
5385 }
5386 
5387 /*@C
5388   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5389 
5390   Not Collective
5391 
5392   Input Parameters:
5393 . A - The matrix in mpiaij format
5394 
5395   Output Parameter:
5396 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5397 . colmap - A map from global column index to local index into lvec
5398 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5399 
5400   Level: developer
5401 
5402 @*/
5403 #if defined(PETSC_USE_CTABLE)
5404 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5405 #else
5406 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5407 #endif
5408 {
5409   Mat_MPIAIJ *a;
5410 
5411   PetscFunctionBegin;
5412   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5413   PetscValidPointer(lvec, 2);
5414   PetscValidPointer(colmap, 3);
5415   PetscValidPointer(multScatter, 4);
5416   a = (Mat_MPIAIJ*) A->data;
5417   if (lvec) *lvec = a->lvec;
5418   if (colmap) *colmap = a->colmap;
5419   if (multScatter) *multScatter = a->Mvctx;
5420   PetscFunctionReturn(0);
5421 }
5422 
5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5425 #if defined(PETSC_HAVE_MKL_SPARSE)
5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5427 #endif
5428 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5429 #if defined(PETSC_HAVE_ELEMENTAL)
5430 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5431 #endif
5432 #if defined(PETSC_HAVE_HYPRE)
5433 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5434 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5435 #endif
5436 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5437 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5438 
5439 /*
5440     Computes (B'*A')' since computing B*A directly is untenable
5441 
5442                n                       p                          p
5443         (              )       (              )         (                  )
5444       m (      A       )  *  n (       B      )   =   m (         C        )
5445         (              )       (              )         (                  )
5446 
5447 */
5448 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5449 {
5450   PetscErrorCode ierr;
5451   Mat            At,Bt,Ct;
5452 
5453   PetscFunctionBegin;
5454   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5455   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5456   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5457   ierr = MatDestroy(&At);CHKERRQ(ierr);
5458   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5459   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5460   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5461   PetscFunctionReturn(0);
5462 }
5463 
5464 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5465 {
5466   PetscErrorCode ierr;
5467   PetscInt       m=A->rmap->n,n=B->cmap->n;
5468   Mat            Cmat;
5469 
5470   PetscFunctionBegin;
5471   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5472   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5473   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5474   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5475   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5476   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5477   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5478   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5479 
5480   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5481 
5482   *C = Cmat;
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 /* ----------------------------------------------------------------*/
5487 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5488 {
5489   PetscErrorCode ierr;
5490 
5491   PetscFunctionBegin;
5492   if (scall == MAT_INITIAL_MATRIX) {
5493     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5494     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5495     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5496   }
5497   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5498   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5499   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 /*MC
5504    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5505 
5506    Options Database Keys:
5507 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5508 
5509   Level: beginner
5510 
5511 .seealso: MatCreateAIJ()
5512 M*/
5513 
5514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5515 {
5516   Mat_MPIAIJ     *b;
5517   PetscErrorCode ierr;
5518   PetscMPIInt    size;
5519 
5520   PetscFunctionBegin;
5521   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5522 
5523   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5524   B->data       = (void*)b;
5525   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5526   B->assembled  = PETSC_FALSE;
5527   B->insertmode = NOT_SET_VALUES;
5528   b->size       = size;
5529 
5530   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5531 
5532   /* build cache for off array entries formed */
5533   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5534 
5535   b->donotstash  = PETSC_FALSE;
5536   b->colmap      = 0;
5537   b->garray      = 0;
5538   b->roworiented = PETSC_TRUE;
5539 
5540   /* stuff used for matrix vector multiply */
5541   b->lvec  = NULL;
5542   b->Mvctx = NULL;
5543 
5544   /* stuff for MatGetRow() */
5545   b->rowindices   = 0;
5546   b->rowvalues    = 0;
5547   b->getrowactive = PETSC_FALSE;
5548 
5549   /* flexible pointer used in CUSP/CUSPARSE classes */
5550   b->spptr = NULL;
5551 
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5561 #if defined(PETSC_HAVE_MKL_SPARSE)
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5563 #endif
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5566 #if defined(PETSC_HAVE_ELEMENTAL)
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5568 #endif
5569 #if defined(PETSC_HAVE_HYPRE)
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5571 #endif
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5577 #if defined(PETSC_HAVE_HYPRE)
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5579 #endif
5580   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /*@C
5585      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5586          and "off-diagonal" part of the matrix in CSR format.
5587 
5588    Collective on MPI_Comm
5589 
5590    Input Parameters:
5591 +  comm - MPI communicator
5592 .  m - number of local rows (Cannot be PETSC_DECIDE)
5593 .  n - This value should be the same as the local size used in creating the
5594        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5595        calculated if N is given) For square matrices n is almost always m.
5596 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5597 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5598 .   i - row indices for "diagonal" portion of matrix
5599 .   j - column indices
5600 .   a - matrix values
5601 .   oi - row indices for "off-diagonal" portion of matrix
5602 .   oj - column indices
5603 -   oa - matrix values
5604 
5605    Output Parameter:
5606 .   mat - the matrix
5607 
5608    Level: advanced
5609 
5610    Notes:
5611        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5612        must free the arrays once the matrix has been destroyed and not before.
5613 
5614        The i and j indices are 0 based
5615 
5616        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5617 
5618        This sets local rows and cannot be used to set off-processor values.
5619 
5620        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5621        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5622        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5623        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5624        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5625        communication if it is known that only local entries will be set.
5626 
5627 .keywords: matrix, aij, compressed row, sparse, parallel
5628 
5629 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5630           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5631 @*/
5632 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5633 {
5634   PetscErrorCode ierr;
5635   Mat_MPIAIJ     *maij;
5636 
5637   PetscFunctionBegin;
5638   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5639   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5640   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5641   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5642   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5643   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5644   maij = (Mat_MPIAIJ*) (*mat)->data;
5645 
5646   (*mat)->preallocated = PETSC_TRUE;
5647 
5648   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5649   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5650 
5651   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5652   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5653 
5654   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5655   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5656   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5657   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658 
5659   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5660   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5661   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5662   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5663   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5664   PetscFunctionReturn(0);
5665 }
5666 
5667 /*
5668     Special version for direct calls from Fortran
5669 */
5670 #include <petsc/private/fortranimpl.h>
5671 
5672 /* Change these macros so can be used in void function */
5673 #undef CHKERRQ
5674 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5675 #undef SETERRQ2
5676 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5677 #undef SETERRQ3
5678 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5679 #undef SETERRQ
5680 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5681 
5682 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5683 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5684 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5685 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5686 #else
5687 #endif
5688 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5689 {
5690   Mat            mat  = *mmat;
5691   PetscInt       m    = *mm, n = *mn;
5692   InsertMode     addv = *maddv;
5693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5694   PetscScalar    value;
5695   PetscErrorCode ierr;
5696 
5697   MatCheckPreallocated(mat,1);
5698   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5699 
5700 #if defined(PETSC_USE_DEBUG)
5701   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5702 #endif
5703   {
5704     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5705     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5706     PetscBool roworiented = aij->roworiented;
5707 
5708     /* Some Variables required in the macro */
5709     Mat        A                 = aij->A;
5710     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5711     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5712     MatScalar  *aa               = a->a;
5713     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5714     Mat        B                 = aij->B;
5715     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5716     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5717     MatScalar  *ba               = b->a;
5718 
5719     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5720     PetscInt  nonew = a->nonew;
5721     MatScalar *ap1,*ap2;
5722 
5723     PetscFunctionBegin;
5724     for (i=0; i<m; i++) {
5725       if (im[i] < 0) continue;
5726 #if defined(PETSC_USE_DEBUG)
5727       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5728 #endif
5729       if (im[i] >= rstart && im[i] < rend) {
5730         row      = im[i] - rstart;
5731         lastcol1 = -1;
5732         rp1      = aj + ai[row];
5733         ap1      = aa + ai[row];
5734         rmax1    = aimax[row];
5735         nrow1    = ailen[row];
5736         low1     = 0;
5737         high1    = nrow1;
5738         lastcol2 = -1;
5739         rp2      = bj + bi[row];
5740         ap2      = ba + bi[row];
5741         rmax2    = bimax[row];
5742         nrow2    = bilen[row];
5743         low2     = 0;
5744         high2    = nrow2;
5745 
5746         for (j=0; j<n; j++) {
5747           if (roworiented) value = v[i*n+j];
5748           else value = v[i+j*m];
5749           if (in[j] >= cstart && in[j] < cend) {
5750             col = in[j] - cstart;
5751             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5752             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5753           } else if (in[j] < 0) continue;
5754 #if defined(PETSC_USE_DEBUG)
5755           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5756           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5757 #endif
5758           else {
5759             if (mat->was_assembled) {
5760               if (!aij->colmap) {
5761                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5762               }
5763 #if defined(PETSC_USE_CTABLE)
5764               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5765               col--;
5766 #else
5767               col = aij->colmap[in[j]] - 1;
5768 #endif
5769               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5770               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5771                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5772                 col  =  in[j];
5773                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5774                 B     = aij->B;
5775                 b     = (Mat_SeqAIJ*)B->data;
5776                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5777                 rp2   = bj + bi[row];
5778                 ap2   = ba + bi[row];
5779                 rmax2 = bimax[row];
5780                 nrow2 = bilen[row];
5781                 low2  = 0;
5782                 high2 = nrow2;
5783                 bm    = aij->B->rmap->n;
5784                 ba    = b->a;
5785               }
5786             } else col = in[j];
5787             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5788           }
5789         }
5790       } else if (!aij->donotstash) {
5791         if (roworiented) {
5792           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5793         } else {
5794           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5795         }
5796       }
5797     }
5798   }
5799   PetscFunctionReturnVoid();
5800 }
5801 
5802