xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a8f87f1dd689f82917d46bbc2fdc0da7b40626d1)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
640   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
641 
642   PetscFunctionBegin;
643   for (i=0; i<m; i++) {
644     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
645     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
646     if (idxm[i] >= rstart && idxm[i] < rend) {
647       row = idxm[i] - rstart;
648       for (j=0; j<n; j++) {
649         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
650         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
651         if (idxn[j] >= cstart && idxn[j] < cend) {
652           col  = idxn[j] - cstart;
653           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
654         } else {
655           if (!aij->colmap) {
656             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
657           }
658 #if defined(PETSC_USE_CTABLE)
659           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
660           col--;
661 #else
662           col = aij->colmap[idxn[j]] - 1;
663 #endif
664           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
665           else {
666             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
667           }
668         }
669       }
670     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
676 
677 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
678 {
679   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
680   PetscErrorCode ierr;
681   PetscInt       nstash,reallocs;
682 
683   PetscFunctionBegin;
684   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
685 
686   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
687   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
688   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
693 {
694   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
695   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
696   PetscErrorCode ierr;
697   PetscMPIInt    n;
698   PetscInt       i,j,rstart,ncols,flg;
699   PetscInt       *row,*col;
700   PetscBool      other_disassembled;
701   PetscScalar    *val;
702 
703   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
704 
705   PetscFunctionBegin;
706   if (!aij->donotstash && !mat->nooffprocentries) {
707     while (1) {
708       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
709       if (!flg) break;
710 
711       for (i=0; i<n; ) {
712         /* Now identify the consecutive vals belonging to the same row */
713         for (j=i,rstart=row[j]; j<n; j++) {
714           if (row[j] != rstart) break;
715         }
716         if (j < n) ncols = j-i;
717         else       ncols = n-i;
718         /* Now assemble all these values with a single function call */
719         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
720 
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
725   }
726   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
727   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
728 
729   /* determine if any processor has disassembled, if so we must
730      also disassemble ourselfs, in order that we may reassemble. */
731   /*
732      if nonzero structure of submatrix B cannot change then we know that
733      no processor disassembled thus we can skip this stuff
734   */
735   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
736     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
737     if (mat->was_assembled && !other_disassembled) {
738       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
739     }
740   }
741   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
742     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
743   }
744   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
745   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
747 
748   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
749 
750   aij->rowvalues = 0;
751 
752   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
753   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
754 
755   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
756   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
757     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
758     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
770   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
775 {
776   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
777   PetscInt      *lrows;
778   PetscInt       r, len;
779   PetscBool      cong;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   /* get locally owned rows */
784   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
795   }
796   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
797   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
798   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
799   if ((diag != 0.0) && cong) {
800     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
801   } else if (diag != 0.0) {
802     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
803     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
804     for (r = 0; r < len; ++r) {
805       const PetscInt row = lrows[r] + A->rmap->rstart;
806       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
807     }
808     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810   } else {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   }
813   ierr = PetscFree(lrows);CHKERRQ(ierr);
814 
815   /* only change matrix nonzero state if pattern was allowed to be changed */
816   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
817     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
818     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
824 {
825   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
826   PetscErrorCode    ierr;
827   PetscMPIInt       n = A->rmap->n;
828   PetscInt          i,j,r,m,p = 0,len = 0;
829   PetscInt          *lrows,*owners = A->rmap->range;
830   PetscSFNode       *rrows;
831   PetscSF           sf;
832   const PetscScalar *xx;
833   PetscScalar       *bb,*mask;
834   Vec               xmask,lmask;
835   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
836   const PetscInt    *aj, *ii,*ridx;
837   PetscScalar       *aa;
838 
839   PetscFunctionBegin;
840   /* Create SF where leaves are input rows and roots are owned rows */
841   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
842   for (r = 0; r < n; ++r) lrows[r] = -1;
843   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
844   for (r = 0; r < N; ++r) {
845     const PetscInt idx   = rows[r];
846     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
847     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
848       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
849     }
850     rrows[r].rank  = p;
851     rrows[r].index = rows[r] - owners[p];
852   }
853   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
854   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
855   /* Collect flags for rows to be zeroed */
856   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
857   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
858   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
859   /* Compress and put in row numbers */
860   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
861   /* zero diagonal part of matrix */
862   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
863   /* handle off diagonal part of matrix */
864   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
865   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
866   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
867   for (i=0; i<len; i++) bb[lrows[i]] = 1;
868   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
869   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
872   if (x) {
873     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
876     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
877   }
878   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
879   /* remove zeroed rows of off diagonal matrix */
880   ii = aij->i;
881   for (i=0; i<len; i++) {
882     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
883   }
884   /* loop over all elements of off process part of matrix zeroing removed columns*/
885   if (aij->compressedrow.use) {
886     m    = aij->compressedrow.nrows;
887     ii   = aij->compressedrow.i;
888     ridx = aij->compressedrow.rindex;
889     for (i=0; i<m; i++) {
890       n  = ii[i+1] - ii[i];
891       aj = aij->j + ii[i];
892       aa = aij->a + ii[i];
893 
894       for (j=0; j<n; j++) {
895         if (PetscAbsScalar(mask[*aj])) {
896           if (b) bb[*ridx] -= *aa*xx[*aj];
897           *aa = 0.0;
898         }
899         aa++;
900         aj++;
901       }
902       ridx++;
903     }
904   } else { /* do not use compressed row format */
905     m = l->B->rmap->n;
906     for (i=0; i<m; i++) {
907       n  = ii[i+1] - ii[i];
908       aj = aij->j + ii[i];
909       aa = aij->a + ii[i];
910       for (j=0; j<n; j++) {
911         if (PetscAbsScalar(mask[*aj])) {
912           if (b) bb[i] -= *aa*xx[*aj];
913           *aa = 0.0;
914         }
915         aa++;
916         aj++;
917       }
918     }
919   }
920   if (x) {
921     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
922     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
923   }
924   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
925   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
926   ierr = PetscFree(lrows);CHKERRQ(ierr);
927 
928   /* only change matrix nonzero state if pattern was allowed to be changed */
929   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
930     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
931     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941   VecScatter     Mvctx = a->Mvctx;
942 
943   PetscFunctionBegin;
944   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
945   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
946 
947   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
949   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
967   PetscErrorCode ierr;
968   VecScatter     Mvctx = a->Mvctx;
969 
970   PetscFunctionBegin;
971   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
972   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
980 {
981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
982   PetscErrorCode ierr;
983   PetscBool      merged;
984 
985   PetscFunctionBegin;
986   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
987   /* do nondiagonal part */
988   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
989   if (!merged) {
990     /* send it on its way */
991     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992     /* do local part */
993     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
994     /* receive remote parts: note this assumes the values are not actually */
995     /* added in yy until the next line, */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   } else {
998     /* do local part */
999     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1000     /* send it on its way */
1001     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1002     /* values actually were received in the Begin() but we need to call this nop */
1003     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004   }
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1009 {
1010   MPI_Comm       comm;
1011   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1012   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1013   IS             Me,Notme;
1014   PetscErrorCode ierr;
1015   PetscInt       M,N,first,last,*notme,i;
1016   PetscMPIInt    size;
1017 
1018   PetscFunctionBegin;
1019   /* Easy test: symmetric diagonal block */
1020   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1021   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1022   if (!*f) PetscFunctionReturn(0);
1023   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1024   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1025   if (size == 1) PetscFunctionReturn(0);
1026 
1027   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1028   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1029   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1030   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1031   for (i=0; i<first; i++) notme[i] = i;
1032   for (i=last; i<M; i++) notme[i-last+first] = i;
1033   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1034   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1035   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1036   Aoff = Aoffs[0];
1037   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1038   Boff = Boffs[0];
1039   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1040   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1041   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1042   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1043   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1044   ierr = PetscFree(notme);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1049 {
1050   PetscErrorCode ierr;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1058 {
1059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1060   PetscErrorCode ierr;
1061 
1062   PetscFunctionBegin;
1063   /* do nondiagonal part */
1064   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1065   /* send it on its way */
1066   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067   /* do local part */
1068   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1069   /* receive remote parts */
1070   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1079 {
1080   PetscErrorCode ierr;
1081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1082 
1083   PetscFunctionBegin;
1084   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1085   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1086   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1091 {
1092   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1097   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1102 {
1103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107 #if defined(PETSC_USE_LOG)
1108   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1109 #endif
1110   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1111   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1112   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1113   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1114 #if defined(PETSC_USE_CTABLE)
1115   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1116 #else
1117   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1118 #endif
1119   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1120   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1122   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1123   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1124   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1125   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1126 
1127   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1136 #if defined(PETSC_HAVE_ELEMENTAL)
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1138 #endif
1139 #if defined(PETSC_HAVE_HYPRE)
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1142 #endif
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1151   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1152   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1153   PetscErrorCode ierr;
1154   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1155   int            fd;
1156   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1157   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1158   PetscScalar    *column_values;
1159   PetscInt       message_count,flowcontrolcount;
1160   FILE           *file;
1161 
1162   PetscFunctionBegin;
1163   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1165   nz   = A->nz + B->nz;
1166   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1167   if (!rank) {
1168     header[0] = MAT_FILE_CLASSID;
1169     header[1] = mat->rmap->N;
1170     header[2] = mat->cmap->N;
1171 
1172     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1173     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     /* get largest number of rows any processor has */
1175     rlen  = mat->rmap->n;
1176     range = mat->rmap->range;
1177     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1178   } else {
1179     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     rlen = mat->rmap->n;
1181   }
1182 
1183   /* load up the local row counts */
1184   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1185   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1186 
1187   /* store the row lengths to the file */
1188   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1189   if (!rank) {
1190     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     for (i=1; i<size; i++) {
1192       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1193       rlen = range[i+1] - range[i];
1194       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1196     }
1197     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1198   } else {
1199     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1200     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1202   }
1203   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1204 
1205   /* load up the local column indices */
1206   nzmax = nz; /* th processor needs space a largest processor needs */
1207   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1208   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1209   cnt   = 0;
1210   for (i=0; i<mat->rmap->n; i++) {
1211     for (j=B->i[i]; j<B->i[i+1]; j++) {
1212       if ((col = garray[B->j[j]]) > cstart) break;
1213       column_indices[cnt++] = col;
1214     }
1215     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1216     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1217   }
1218   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1219 
1220   /* store the column indices to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1229       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     }
1232     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1235     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1238   }
1239   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1240 
1241   /* load up the local column values */
1242   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1243   cnt  = 0;
1244   for (i=0; i<mat->rmap->n; i++) {
1245     for (j=B->i[i]; j<B->i[i+1]; j++) {
1246       if (garray[B->j[j]] > cstart) break;
1247       column_values[cnt++] = B->a[j];
1248     }
1249     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1250     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1251   }
1252   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1253 
1254   /* store the column values to the file */
1255   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1256   if (!rank) {
1257     MPI_Status status;
1258     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1262       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1263       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_values);CHKERRQ(ierr);
1274 
1275   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1276   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   PetscErrorCode    ierr;
1285   PetscMPIInt       rank = aij->rank,size = aij->size;
1286   PetscBool         isdraw,iascii,isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1292   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1294   if (iascii) {
1295     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1298       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1299       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1300       for (i=0; i<(PetscInt)size; i++) {
1301         nmax = PetscMax(nmax,nz[i]);
1302         nmin = PetscMin(nmin,nz[i]);
1303         navg += nz[i];
1304       }
1305       ierr = PetscFree(nz);CHKERRQ(ierr);
1306       navg = navg/size;
1307       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1308       PetscFunctionReturn(0);
1309     }
1310     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscBool inodes;
1314 
1315       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1316       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1317       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1319       if (!inodes) {
1320         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1321                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1322       } else {
1323         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1324                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1325       }
1326       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1332       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1333       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1334       PetscFunctionReturn(0);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount,inodelimit,*inodes;
1337       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1338       if (inodes) {
1339         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1342       }
1343       PetscFunctionReturn(0);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(0);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1350       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1351     } else {
1352       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1353     }
1354     PetscFunctionReturn(0);
1355   } else if (isdraw) {
1356     PetscDraw draw;
1357     PetscBool isnull;
1358     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1359     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1360     if (isnull) PetscFunctionReturn(0);
1361   }
1362 
1363   {
1364     /* assemble the entire matrix onto first processor. */
1365     Mat        A;
1366     Mat_SeqAIJ *Aloc;
1367     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1368     MatScalar  *a;
1369 
1370     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1371     if (!rank) {
1372       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1373     } else {
1374       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1375     }
1376     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1377     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1378     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1379     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1380     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1381 
1382     /* copy over the A part */
1383     Aloc = (Mat_SeqAIJ*)aij->A->data;
1384     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1385     row  = mat->rmap->rstart;
1386     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1387     for (i=0; i<m; i++) {
1388       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1389       row++;
1390       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1391     }
1392     aj = Aloc->j;
1393     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1394 
1395     /* copy over the B part */
1396     Aloc = (Mat_SeqAIJ*)aij->B->data;
1397     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1398     row  = mat->rmap->rstart;
1399     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1400     ct   = cols;
1401     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1402     for (i=0; i<m; i++) {
1403       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1404       row++;
1405       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1406     }
1407     ierr = PetscFree(ct);CHKERRQ(ierr);
1408     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410     /*
1411        Everyone has to call to draw the matrix since the graphics waits are
1412        synchronized across all processors that share the PetscDraw object
1413     */
1414     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1415     if (!rank) {
1416       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1417       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1420     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421     ierr = MatDestroy(&A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1427 {
1428   PetscErrorCode ierr;
1429   PetscBool      iascii,isdraw,issocket,isbinary;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1436   if (iascii || isdraw || isbinary || issocket) {
1437     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1438   }
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1443 {
1444   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1445   PetscErrorCode ierr;
1446   Vec            bb1 = 0;
1447   PetscBool      hasop;
1448 
1449   PetscFunctionBegin;
1450   if (flag == SOR_APPLY_UPPER) {
1451     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452     PetscFunctionReturn(0);
1453   }
1454 
1455   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1456     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1457   }
1458 
1459   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1462       its--;
1463     }
1464 
1465     while (its--) {
1466       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1468 
1469       /* update rhs: bb1 = bb - B*x */
1470       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1471       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1472 
1473       /* local sweep */
1474       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1475     }
1476   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1477     if (flag & SOR_ZERO_INITIAL_GUESS) {
1478       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1479       its--;
1480     }
1481     while (its--) {
1482       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1484 
1485       /* update rhs: bb1 = bb - B*x */
1486       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1487       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1488 
1489       /* local sweep */
1490       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1491     }
1492   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1493     if (flag & SOR_ZERO_INITIAL_GUESS) {
1494       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1495       its--;
1496     }
1497     while (its--) {
1498       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500 
1501       /* update rhs: bb1 = bb - B*x */
1502       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1503       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1504 
1505       /* local sweep */
1506       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1507     }
1508   } else if (flag & SOR_EISENSTAT) {
1509     Vec xx1;
1510 
1511     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1513 
1514     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516     if (!mat->diag) {
1517       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1518       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1519     }
1520     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1521     if (hasop) {
1522       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1523     } else {
1524       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1525     }
1526     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1527 
1528     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1529 
1530     /* local sweep */
1531     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1532     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1533     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1534   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1535 
1536   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1537 
1538   matin->factorerrortype = mat->A->factorerrortype;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1543 {
1544   Mat            aA,aB,Aperm;
1545   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1546   PetscScalar    *aa,*ba;
1547   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1548   PetscSF        rowsf,sf;
1549   IS             parcolp = NULL;
1550   PetscBool      done;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1555   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1556   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1557   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1558 
1559   /* Invert row permutation to find out where my rows should go */
1560   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1561   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1562   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1563   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1564   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1566 
1567   /* Invert column permutation to find out where my columns should go */
1568   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1569   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1570   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1571   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1572   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1574   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1575 
1576   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1577   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1578   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1579 
1580   /* Find out where my gcols should go */
1581   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1582   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1584   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1585   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1586   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1588   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1589 
1590   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1592   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1593   for (i=0; i<m; i++) {
1594     PetscInt row = rdest[i],rowner;
1595     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1596     for (j=ai[i]; j<ai[i+1]; j++) {
1597       PetscInt cowner,col = cdest[aj[j]];
1598       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1599       if (rowner == cowner) dnnz[i]++;
1600       else onnz[i]++;
1601     }
1602     for (j=bi[i]; j<bi[i+1]; j++) {
1603       PetscInt cowner,col = gcdest[bj[j]];
1604       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1605       if (rowner == cowner) dnnz[i]++;
1606       else onnz[i]++;
1607     }
1608   }
1609   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1614 
1615   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1617   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1618   for (i=0; i<m; i++) {
1619     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1620     PetscInt j0,rowlen;
1621     rowlen = ai[i+1] - ai[i];
1622     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1623       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1624       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1625     }
1626     rowlen = bi[i+1] - bi[i];
1627     for (j0=j=0; j<rowlen; j0=j) {
1628       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1629       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1630     }
1631   }
1632   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1635   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1637   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1638   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1639   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1640   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1641   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1642   *B = Aperm;
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1647 {
1648   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1653   if (ghosts) *ghosts = aij->garray;
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1658 {
1659   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1660   Mat            A    = mat->A,B = mat->B;
1661   PetscErrorCode ierr;
1662   PetscReal      isend[5],irecv[5];
1663 
1664   PetscFunctionBegin;
1665   info->block_size = 1.0;
1666   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1667 
1668   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1669   isend[3] = info->memory;  isend[4] = info->mallocs;
1670 
1671   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1672 
1673   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1674   isend[3] += info->memory;  isend[4] += info->mallocs;
1675   if (flag == MAT_LOCAL) {
1676     info->nz_used      = isend[0];
1677     info->nz_allocated = isend[1];
1678     info->nz_unneeded  = isend[2];
1679     info->memory       = isend[3];
1680     info->mallocs      = isend[4];
1681   } else if (flag == MAT_GLOBAL_MAX) {
1682     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1683 
1684     info->nz_used      = irecv[0];
1685     info->nz_allocated = irecv[1];
1686     info->nz_unneeded  = irecv[2];
1687     info->memory       = irecv[3];
1688     info->mallocs      = irecv[4];
1689   } else if (flag == MAT_GLOBAL_SUM) {
1690     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1691 
1692     info->nz_used      = irecv[0];
1693     info->nz_allocated = irecv[1];
1694     info->nz_unneeded  = irecv[2];
1695     info->memory       = irecv[3];
1696     info->mallocs      = irecv[4];
1697   }
1698   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1699   info->fill_ratio_needed = 0;
1700   info->factor_mallocs    = 0;
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1705 {
1706   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   switch (op) {
1711   case MAT_NEW_NONZERO_LOCATIONS:
1712   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1713   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1714   case MAT_KEEP_NONZERO_PATTERN:
1715   case MAT_NEW_NONZERO_LOCATION_ERR:
1716   case MAT_USE_INODES:
1717   case MAT_IGNORE_ZERO_ENTRIES:
1718     MatCheckPreallocated(A,1);
1719     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1720     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_ROW_ORIENTED:
1723     MatCheckPreallocated(A,1);
1724     a->roworiented = flg;
1725 
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_NEW_DIAGONALS:
1730     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1731     break;
1732   case MAT_IGNORE_OFF_PROC_ENTRIES:
1733     a->donotstash = flg;
1734     break;
1735   case MAT_SPD:
1736     A->spd_set = PETSC_TRUE;
1737     A->spd     = flg;
1738     if (flg) {
1739       A->symmetric                  = PETSC_TRUE;
1740       A->structurally_symmetric     = PETSC_TRUE;
1741       A->symmetric_set              = PETSC_TRUE;
1742       A->structurally_symmetric_set = PETSC_TRUE;
1743     }
1744     break;
1745   case MAT_SYMMETRIC:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_STRUCTURALLY_SYMMETRIC:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_HERMITIAN:
1754     MatCheckPreallocated(A,1);
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_SYMMETRY_ETERNAL:
1758     MatCheckPreallocated(A,1);
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     break;
1761   case MAT_SUBMAT_SINGLEIS:
1762     A->submat_singleis = flg;
1763     break;
1764   case MAT_STRUCTURE_ONLY:
1765     /* The option is handled directly by MatSetOption() */
1766     break;
1767   default:
1768     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1769   }
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1774 {
1775   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1776   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1777   PetscErrorCode ierr;
1778   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1779   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1780   PetscInt       *cmap,*idx_p;
1781 
1782   PetscFunctionBegin;
1783   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1784   mat->getrowactive = PETSC_TRUE;
1785 
1786   if (!mat->rowvalues && (idx || v)) {
1787     /*
1788         allocate enough space to hold information from the longest row.
1789     */
1790     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1791     PetscInt   max = 1,tmp;
1792     for (i=0; i<matin->rmap->n; i++) {
1793       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1794       if (max < tmp) max = tmp;
1795     }
1796     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1797   }
1798 
1799   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1800   lrow = row - rstart;
1801 
1802   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1803   if (!v)   {pvA = 0; pvB = 0;}
1804   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1805   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1806   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1807   nztot = nzA + nzB;
1808 
1809   cmap = mat->garray;
1810   if (v  || idx) {
1811     if (nztot) {
1812       /* Sort by increasing column numbers, assuming A and B already sorted */
1813       PetscInt imark = -1;
1814       if (v) {
1815         *v = v_p = mat->rowvalues;
1816         for (i=0; i<nzB; i++) {
1817           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1818           else break;
1819         }
1820         imark = i;
1821         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1822         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1823       }
1824       if (idx) {
1825         *idx = idx_p = mat->rowindices;
1826         if (imark > -1) {
1827           for (i=0; i<imark; i++) {
1828             idx_p[i] = cmap[cworkB[i]];
1829           }
1830         } else {
1831           for (i=0; i<nzB; i++) {
1832             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1833             else break;
1834           }
1835           imark = i;
1836         }
1837         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1838         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1839       }
1840     } else {
1841       if (idx) *idx = 0;
1842       if (v)   *v   = 0;
1843     }
1844   }
1845   *nz  = nztot;
1846   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1847   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1852 {
1853   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1854 
1855   PetscFunctionBegin;
1856   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1857   aij->getrowactive = PETSC_FALSE;
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1862 {
1863   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1864   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1865   PetscErrorCode ierr;
1866   PetscInt       i,j,cstart = mat->cmap->rstart;
1867   PetscReal      sum = 0.0;
1868   MatScalar      *v;
1869 
1870   PetscFunctionBegin;
1871   if (aij->size == 1) {
1872     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1873   } else {
1874     if (type == NORM_FROBENIUS) {
1875       v = amat->a;
1876       for (i=0; i<amat->nz; i++) {
1877         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1878       }
1879       v = bmat->a;
1880       for (i=0; i<bmat->nz; i++) {
1881         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1882       }
1883       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1884       *norm = PetscSqrtReal(*norm);
1885       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1886     } else if (type == NORM_1) { /* max column norm */
1887       PetscReal *tmp,*tmp2;
1888       PetscInt  *jj,*garray = aij->garray;
1889       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1890       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1891       *norm = 0.0;
1892       v     = amat->a; jj = amat->j;
1893       for (j=0; j<amat->nz; j++) {
1894         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1895       }
1896       v = bmat->a; jj = bmat->j;
1897       for (j=0; j<bmat->nz; j++) {
1898         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1899       }
1900       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1901       for (j=0; j<mat->cmap->N; j++) {
1902         if (tmp2[j] > *norm) *norm = tmp2[j];
1903       }
1904       ierr = PetscFree(tmp);CHKERRQ(ierr);
1905       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1906       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1907     } else if (type == NORM_INFINITY) { /* max row norm */
1908       PetscReal ntemp = 0.0;
1909       for (j=0; j<aij->A->rmap->n; j++) {
1910         v   = amat->a + amat->i[j];
1911         sum = 0.0;
1912         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1913           sum += PetscAbsScalar(*v); v++;
1914         }
1915         v = bmat->a + bmat->i[j];
1916         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1917           sum += PetscAbsScalar(*v); v++;
1918         }
1919         if (sum > ntemp) ntemp = sum;
1920       }
1921       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1922       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1923     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1929 {
1930   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1931   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1934   PetscInt       cstart = A->cmap->rstart,ncol;
1935   Mat            B;
1936   MatScalar      *array;
1937 
1938   PetscFunctionBegin;
1939   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1940   ai = Aloc->i; aj = Aloc->j;
1941   bi = Bloc->i; bj = Bloc->j;
1942   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1943     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1944     PetscSFNode          *oloc;
1945     PETSC_UNUSED PetscSF sf;
1946 
1947     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1948     /* compute d_nnz for preallocation */
1949     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1950     for (i=0; i<ai[ma]; i++) {
1951       d_nnz[aj[i]]++;
1952       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1953     }
1954     /* compute local off-diagonal contributions */
1955     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1956     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1957     /* map those to global */
1958     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1959     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1960     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1961     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1962     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1963     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1964     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1965 
1966     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1967     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1968     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1969     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1970     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1971     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1972   } else {
1973     B    = *matout;
1974     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1975     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1976   }
1977 
1978   /* copy over the A part */
1979   array = Aloc->a;
1980   row   = A->rmap->rstart;
1981   for (i=0; i<ma; i++) {
1982     ncol = ai[i+1]-ai[i];
1983     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1984     row++;
1985     array += ncol; aj += ncol;
1986   }
1987   aj = Aloc->j;
1988   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1989 
1990   /* copy over the B part */
1991   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1992   array = Bloc->a;
1993   row   = A->rmap->rstart;
1994   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1995   cols_tmp = cols;
1996   for (i=0; i<mb; i++) {
1997     ncol = bi[i+1]-bi[i];
1998     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1999     row++;
2000     array += ncol; cols_tmp += ncol;
2001   }
2002   ierr = PetscFree(cols);CHKERRQ(ierr);
2003 
2004   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2007     *matout = B;
2008   } else {
2009     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2015 {
2016   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2017   Mat            a    = aij->A,b = aij->B;
2018   PetscErrorCode ierr;
2019   PetscInt       s1,s2,s3;
2020 
2021   PetscFunctionBegin;
2022   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2023   if (rr) {
2024     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2025     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2026     /* Overlap communication with computation. */
2027     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2028   }
2029   if (ll) {
2030     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2031     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2032     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2033   }
2034   /* scale  the diagonal block */
2035   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2036 
2037   if (rr) {
2038     /* Do a scatter end and then right scale the off-diagonal block */
2039     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2040     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2046 {
2047   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2048   PetscErrorCode ierr;
2049 
2050   PetscFunctionBegin;
2051   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2056 {
2057   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2058   Mat            a,b,c,d;
2059   PetscBool      flg;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   a = matA->A; b = matA->B;
2064   c = matB->A; d = matB->B;
2065 
2066   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2067   if (flg) {
2068     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2069   }
2070   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2075 {
2076   PetscErrorCode ierr;
2077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2078   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2079 
2080   PetscFunctionBegin;
2081   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2082   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2083     /* because of the column compression in the off-processor part of the matrix a->B,
2084        the number of columns in a->B and b->B may be different, hence we cannot call
2085        the MatCopy() directly on the two parts. If need be, we can provide a more
2086        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2087        then copying the submatrices */
2088     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2089   } else {
2090     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2091     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2092   }
2093   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2098 {
2099   PetscErrorCode ierr;
2100 
2101   PetscFunctionBegin;
2102   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 /*
2107    Computes the number of nonzeros per row needed for preallocation when X and Y
2108    have different nonzero structure.
2109 */
2110 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2111 {
2112   PetscInt       i,j,k,nzx,nzy;
2113 
2114   PetscFunctionBegin;
2115   /* Set the number of nonzeros in the new matrix */
2116   for (i=0; i<m; i++) {
2117     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2118     nzx = xi[i+1] - xi[i];
2119     nzy = yi[i+1] - yi[i];
2120     nnz[i] = 0;
2121     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2122       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2123       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2124       nnz[i]++;
2125     }
2126     for (; k<nzy; k++) nnz[i]++;
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2132 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2133 {
2134   PetscErrorCode ierr;
2135   PetscInt       m = Y->rmap->N;
2136   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2137   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2148   PetscBLASInt   bnz,one=1;
2149   Mat_SeqAIJ     *x,*y;
2150 
2151   PetscFunctionBegin;
2152   if (str == SAME_NONZERO_PATTERN) {
2153     PetscScalar alpha = a;
2154     x    = (Mat_SeqAIJ*)xx->A->data;
2155     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2156     y    = (Mat_SeqAIJ*)yy->A->data;
2157     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2158     x    = (Mat_SeqAIJ*)xx->B->data;
2159     y    = (Mat_SeqAIJ*)yy->B->data;
2160     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2161     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2162     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2163   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2164     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2165   } else {
2166     Mat      B;
2167     PetscInt *nnz_d,*nnz_o;
2168     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2169     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2170     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2171     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2172     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2173     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2174     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2175     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2176     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2177     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2178     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2179     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2180     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2181     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2182   }
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2187 
2188 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2189 {
2190 #if defined(PETSC_USE_COMPLEX)
2191   PetscErrorCode ierr;
2192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2193 
2194   PetscFunctionBegin;
2195   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2196   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2197 #else
2198   PetscFunctionBegin;
2199 #endif
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2210   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2215 {
2216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2221   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2226 {
2227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229   PetscInt       i,*idxb = 0;
2230   PetscScalar    *va,*vb;
2231   Vec            vtmp;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2235   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2236   if (idx) {
2237     for (i=0; i<A->rmap->n; i++) {
2238       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2239     }
2240   }
2241 
2242   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2243   if (idx) {
2244     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2245   }
2246   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2247   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2248 
2249   for (i=0; i<A->rmap->n; i++) {
2250     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2251       va[i] = vb[i];
2252       if (idx) idx[i] = a->garray[idxb[i]];
2253     }
2254   }
2255 
2256   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2257   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2258   ierr = PetscFree(idxb);CHKERRQ(ierr);
2259   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2264 {
2265   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267   PetscInt       i,*idxb = 0;
2268   PetscScalar    *va,*vb;
2269   Vec            vtmp;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2273   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2274   if (idx) {
2275     for (i=0; i<A->cmap->n; i++) {
2276       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2277     }
2278   }
2279 
2280   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2281   if (idx) {
2282     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2283   }
2284   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2285   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2286 
2287   for (i=0; i<A->rmap->n; i++) {
2288     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2289       va[i] = vb[i];
2290       if (idx) idx[i] = a->garray[idxb[i]];
2291     }
2292   }
2293 
2294   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2295   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2296   ierr = PetscFree(idxb);CHKERRQ(ierr);
2297   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2302 {
2303   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2304   PetscInt       n      = A->rmap->n;
2305   PetscInt       cstart = A->cmap->rstart;
2306   PetscInt       *cmap  = mat->garray;
2307   PetscInt       *diagIdx, *offdiagIdx;
2308   Vec            diagV, offdiagV;
2309   PetscScalar    *a, *diagA, *offdiagA;
2310   PetscInt       r;
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2315   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2316   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2317   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2318   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2319   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2320   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2321   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2322   for (r = 0; r < n; ++r) {
2323     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2324       a[r]   = diagA[r];
2325       idx[r] = cstart + diagIdx[r];
2326     } else {
2327       a[r]   = offdiagA[r];
2328       idx[r] = cmap[offdiagIdx[r]];
2329     }
2330   }
2331   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2332   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2333   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2334   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2335   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2336   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2343   PetscInt       n      = A->rmap->n;
2344   PetscInt       cstart = A->cmap->rstart;
2345   PetscInt       *cmap  = mat->garray;
2346   PetscInt       *diagIdx, *offdiagIdx;
2347   Vec            diagV, offdiagV;
2348   PetscScalar    *a, *diagA, *offdiagA;
2349   PetscInt       r;
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2354   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2355   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2356   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2357   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2359   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2360   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2361   for (r = 0; r < n; ++r) {
2362     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2363       a[r]   = diagA[r];
2364       idx[r] = cstart + diagIdx[r];
2365     } else {
2366       a[r]   = offdiagA[r];
2367       idx[r] = cmap[offdiagIdx[r]];
2368     }
2369   }
2370   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2373   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2374   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2375   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2380 {
2381   PetscErrorCode ierr;
2382   Mat            *dummy;
2383 
2384   PetscFunctionBegin;
2385   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2386   *newmat = *dummy;
2387   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2392 {
2393   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2398   A->factorerrortype = a->A->factorerrortype;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2403 {
2404   PetscErrorCode ierr;
2405   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2409   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2410   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2416 {
2417   PetscFunctionBegin;
2418   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2419   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 /*@
2424    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2425 
2426    Collective on Mat
2427 
2428    Input Parameters:
2429 +    A - the matrix
2430 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2431 
2432  Level: advanced
2433 
2434 @*/
2435 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2436 {
2437   PetscErrorCode       ierr;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2445 {
2446   PetscErrorCode       ierr;
2447   PetscBool            sc = PETSC_FALSE,flg;
2448 
2449   PetscFunctionBegin;
2450   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2451   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2452   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2453   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2454   if (flg) {
2455     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2456   }
2457   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2462 {
2463   PetscErrorCode ierr;
2464   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2465   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2466 
2467   PetscFunctionBegin;
2468   if (!Y->preallocated) {
2469     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2470   } else if (!aij->nz) {
2471     PetscInt nonew = aij->nonew;
2472     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2473     aij->nonew = nonew;
2474   }
2475   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2480 {
2481   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2486   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2487   if (d) {
2488     PetscInt rstart;
2489     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2490     *d += rstart;
2491 
2492   }
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 
2497 /* -------------------------------------------------------------------*/
2498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2499                                        MatGetRow_MPIAIJ,
2500                                        MatRestoreRow_MPIAIJ,
2501                                        MatMult_MPIAIJ,
2502                                 /* 4*/ MatMultAdd_MPIAIJ,
2503                                        MatMultTranspose_MPIAIJ,
2504                                        MatMultTransposeAdd_MPIAIJ,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                 /*10*/ 0,
2509                                        0,
2510                                        0,
2511                                        MatSOR_MPIAIJ,
2512                                        MatTranspose_MPIAIJ,
2513                                 /*15*/ MatGetInfo_MPIAIJ,
2514                                        MatEqual_MPIAIJ,
2515                                        MatGetDiagonal_MPIAIJ,
2516                                        MatDiagonalScale_MPIAIJ,
2517                                        MatNorm_MPIAIJ,
2518                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2519                                        MatAssemblyEnd_MPIAIJ,
2520                                        MatSetOption_MPIAIJ,
2521                                        MatZeroEntries_MPIAIJ,
2522                                 /*24*/ MatZeroRows_MPIAIJ,
2523                                        0,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                 /*29*/ MatSetUp_MPIAIJ,
2528                                        0,
2529                                        0,
2530                                        MatGetDiagonalBlock_MPIAIJ,
2531                                        0,
2532                                 /*34*/ MatDuplicate_MPIAIJ,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                 /*39*/ MatAXPY_MPIAIJ,
2538                                        MatCreateSubMatrices_MPIAIJ,
2539                                        MatIncreaseOverlap_MPIAIJ,
2540                                        MatGetValues_MPIAIJ,
2541                                        MatCopy_MPIAIJ,
2542                                 /*44*/ MatGetRowMax_MPIAIJ,
2543                                        MatScale_MPIAIJ,
2544                                        MatShift_MPIAIJ,
2545                                        MatDiagonalSet_MPIAIJ,
2546                                        MatZeroRowsColumns_MPIAIJ,
2547                                 /*49*/ MatSetRandom_MPIAIJ,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2553                                        0,
2554                                        MatSetUnfactored_MPIAIJ,
2555                                        MatPermute_MPIAIJ,
2556                                        0,
2557                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2558                                        MatDestroy_MPIAIJ,
2559                                        MatView_MPIAIJ,
2560                                        0,
2561                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2562                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2563                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2568                                        MatGetRowMinAbs_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*75*/ MatFDColoringApply_AIJ,
2574                                        MatSetFromOptions_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                        MatFindZeroDiagonals_MPIAIJ,
2578                                 /*80*/ 0,
2579                                        0,
2580                                        0,
2581                                 /*83*/ MatLoad_MPIAIJ,
2582                                        MatIsSymmetric_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2588                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2589                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2590                                        MatPtAP_MPIAIJ_MPIAIJ,
2591                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2592                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                 /*99*/ 0,
2598                                        0,
2599                                        0,
2600                                        MatConjugate_MPIAIJ,
2601                                        0,
2602                                 /*104*/MatSetValuesRow_MPIAIJ,
2603                                        MatRealPart_MPIAIJ,
2604                                        MatImaginaryPart_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                 /*109*/0,
2608                                        0,
2609                                        MatGetRowMin_MPIAIJ,
2610                                        0,
2611                                        MatMissingDiagonal_MPIAIJ,
2612                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2613                                        0,
2614                                        MatGetGhosts_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                 /*119*/0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        MatGetMultiProcBlock_MPIAIJ,
2622                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2623                                        MatGetColumnNorms_MPIAIJ,
2624                                        MatInvertBlockDiagonal_MPIAIJ,
2625                                        0,
2626                                        MatCreateSubMatricesMPI_MPIAIJ,
2627                                 /*129*/0,
2628                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2629                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2630                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2631                                        0,
2632                                 /*134*/0,
2633                                        0,
2634                                        MatRARt_MPIAIJ_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                 /*139*/MatSetBlockSizes_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        MatFDColoringSetUp_MPIXAIJ,
2641                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2642                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2643 };
2644 
2645 /* ----------------------------------------------------------------------------------------*/
2646 
2647 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2648 {
2649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2654   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2659 {
2660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2661   PetscErrorCode ierr;
2662 
2663   PetscFunctionBegin;
2664   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2665   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2670 {
2671   Mat_MPIAIJ     *b;
2672   PetscErrorCode ierr;
2673 
2674   PetscFunctionBegin;
2675   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2676   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2677   b = (Mat_MPIAIJ*)B->data;
2678 
2679 #if defined(PETSC_USE_CTABLE)
2680   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2681 #else
2682   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2683 #endif
2684   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2685   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2686   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2687 
2688   /* Because the B will have been resized we simply destroy it and create a new one each time */
2689   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2690   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2691   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2692   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2693   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2694   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2695 
2696   if (!B->preallocated) {
2697     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2698     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2699     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2700     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2701     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2702   }
2703 
2704   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2705   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2706   B->preallocated  = PETSC_TRUE;
2707   B->was_assembled = PETSC_FALSE;
2708   B->assembled     = PETSC_FALSE;;
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2713 {
2714   Mat_MPIAIJ     *b;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2719   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2720   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2721   b = (Mat_MPIAIJ*)B->data;
2722 
2723 #if defined(PETSC_USE_CTABLE)
2724   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2725 #else
2726   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2727 #endif
2728   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2729   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2730   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2731 
2732   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2733   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2734   B->preallocated  = PETSC_TRUE;
2735   B->was_assembled = PETSC_FALSE;
2736   B->assembled = PETSC_FALSE;
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2741 {
2742   Mat            mat;
2743   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   *newmat = 0;
2748   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2749   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2750   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2751   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2752   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2753   a       = (Mat_MPIAIJ*)mat->data;
2754 
2755   mat->factortype   = matin->factortype;
2756   mat->assembled    = PETSC_TRUE;
2757   mat->insertmode   = NOT_SET_VALUES;
2758   mat->preallocated = PETSC_TRUE;
2759 
2760   a->size         = oldmat->size;
2761   a->rank         = oldmat->rank;
2762   a->donotstash   = oldmat->donotstash;
2763   a->roworiented  = oldmat->roworiented;
2764   a->rowindices   = 0;
2765   a->rowvalues    = 0;
2766   a->getrowactive = PETSC_FALSE;
2767 
2768   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2770 
2771   if (oldmat->colmap) {
2772 #if defined(PETSC_USE_CTABLE)
2773     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2774 #else
2775     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2776     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2777     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2778 #endif
2779   } else a->colmap = 0;
2780   if (oldmat->garray) {
2781     PetscInt len;
2782     len  = oldmat->B->cmap->n;
2783     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2784     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2785     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2786   } else a->garray = 0;
2787 
2788   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2789   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2790   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2791   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2792 
2793   if (oldmat->Mvctx_mpi1) {
2794     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2795     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2796   }
2797 
2798   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2799   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2800   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2801   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2802   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2803   *newmat = mat;
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2808 {
2809   PetscScalar    *vals,*svals;
2810   MPI_Comm       comm;
2811   PetscErrorCode ierr;
2812   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2813   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2814   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2815   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2816   PetscInt       cend,cstart,n,*rowners;
2817   int            fd;
2818   PetscInt       bs = newMat->rmap->bs;
2819 
2820   PetscFunctionBegin;
2821   /* force binary viewer to load .info file if it has not yet done so */
2822   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2823   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2824   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2825   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2826   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2827   if (!rank) {
2828     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2829     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2830     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2831   }
2832 
2833   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2834   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2835   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2836   if (bs < 0) bs = 1;
2837 
2838   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2839   M    = header[1]; N = header[2];
2840 
2841   /* If global sizes are set, check if they are consistent with that given in the file */
2842   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2843   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2844 
2845   /* determine ownership of all (block) rows */
2846   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2847   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2848   else m = newMat->rmap->n; /* Set by user */
2849 
2850   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2851   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2852 
2853   /* First process needs enough room for process with most rows */
2854   if (!rank) {
2855     mmax = rowners[1];
2856     for (i=2; i<=size; i++) {
2857       mmax = PetscMax(mmax, rowners[i]);
2858     }
2859   } else mmax = -1;             /* unused, but compilers complain */
2860 
2861   rowners[0] = 0;
2862   for (i=2; i<=size; i++) {
2863     rowners[i] += rowners[i-1];
2864   }
2865   rstart = rowners[rank];
2866   rend   = rowners[rank+1];
2867 
2868   /* distribute row lengths to all processors */
2869   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2870   if (!rank) {
2871     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2872     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2873     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2874     for (j=0; j<m; j++) {
2875       procsnz[0] += ourlens[j];
2876     }
2877     for (i=1; i<size; i++) {
2878       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2879       /* calculate the number of nonzeros on each processor */
2880       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2881         procsnz[i] += rowlengths[j];
2882       }
2883       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2884     }
2885     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2886   } else {
2887     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2888   }
2889 
2890   if (!rank) {
2891     /* determine max buffer needed and allocate it */
2892     maxnz = 0;
2893     for (i=0; i<size; i++) {
2894       maxnz = PetscMax(maxnz,procsnz[i]);
2895     }
2896     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2897 
2898     /* read in my part of the matrix column indices  */
2899     nz   = procsnz[0];
2900     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2901     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2902 
2903     /* read in every one elses and ship off */
2904     for (i=1; i<size; i++) {
2905       nz   = procsnz[i];
2906       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2907       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2908     }
2909     ierr = PetscFree(cols);CHKERRQ(ierr);
2910   } else {
2911     /* determine buffer space needed for message */
2912     nz = 0;
2913     for (i=0; i<m; i++) {
2914       nz += ourlens[i];
2915     }
2916     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2917 
2918     /* receive message of column indices*/
2919     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2920   }
2921 
2922   /* determine column ownership if matrix is not square */
2923   if (N != M) {
2924     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2925     else n = newMat->cmap->n;
2926     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2927     cstart = cend - n;
2928   } else {
2929     cstart = rstart;
2930     cend   = rend;
2931     n      = cend - cstart;
2932   }
2933 
2934   /* loop over local rows, determining number of off diagonal entries */
2935   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2936   jj   = 0;
2937   for (i=0; i<m; i++) {
2938     for (j=0; j<ourlens[i]; j++) {
2939       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2940       jj++;
2941     }
2942   }
2943 
2944   for (i=0; i<m; i++) {
2945     ourlens[i] -= offlens[i];
2946   }
2947   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2948 
2949   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2950 
2951   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2952 
2953   for (i=0; i<m; i++) {
2954     ourlens[i] += offlens[i];
2955   }
2956 
2957   if (!rank) {
2958     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2959 
2960     /* read in my part of the matrix numerical values  */
2961     nz   = procsnz[0];
2962     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2963 
2964     /* insert into matrix */
2965     jj      = rstart;
2966     smycols = mycols;
2967     svals   = vals;
2968     for (i=0; i<m; i++) {
2969       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2970       smycols += ourlens[i];
2971       svals   += ourlens[i];
2972       jj++;
2973     }
2974 
2975     /* read in other processors and ship out */
2976     for (i=1; i<size; i++) {
2977       nz   = procsnz[i];
2978       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2979       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2980     }
2981     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2982   } else {
2983     /* receive numeric values */
2984     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2985 
2986     /* receive message of values*/
2987     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2988 
2989     /* insert into matrix */
2990     jj      = rstart;
2991     smycols = mycols;
2992     svals   = vals;
2993     for (i=0; i<m; i++) {
2994       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2995       smycols += ourlens[i];
2996       svals   += ourlens[i];
2997       jj++;
2998     }
2999   }
3000   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3001   ierr = PetscFree(vals);CHKERRQ(ierr);
3002   ierr = PetscFree(mycols);CHKERRQ(ierr);
3003   ierr = PetscFree(rowners);CHKERRQ(ierr);
3004   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 /* Not scalable because of ISAllGather() unless getting all columns. */
3010 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3011 {
3012   PetscErrorCode ierr;
3013   IS             iscol_local;
3014   PetscBool      isstride;
3015   PetscMPIInt    lisstride=0,gisstride;
3016 
3017   PetscFunctionBegin;
3018   /* check if we are grabbing all columns*/
3019   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3020 
3021   if (isstride) {
3022     PetscInt  start,len,mstart,mlen;
3023     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3024     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3025     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3026     if (mstart == start && mlen-mstart == len) lisstride = 1;
3027   }
3028 
3029   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3030   if (gisstride) {
3031     PetscInt N;
3032     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3033     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3034     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3035     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3036   } else {
3037     PetscInt cbs;
3038     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3039     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3040     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3041   }
3042 
3043   *isseq = iscol_local;
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 /*
3048  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3049  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3050 
3051  Input Parameters:
3052    mat - matrix
3053    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3054            i.e., mat->rstart <= isrow[i] < mat->rend
3055    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3056            i.e., mat->cstart <= iscol[i] < mat->cend
3057  Output Parameter:
3058    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3059    iscol_o - sequential column index set for retrieving mat->B
3060    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3061  */
3062 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3063 {
3064   PetscErrorCode ierr;
3065   Vec            x,cmap;
3066   const PetscInt *is_idx;
3067   PetscScalar    *xarray,*cmaparray;
3068   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3069   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3070   Mat            B=a->B;
3071   Vec            lvec=a->lvec,lcmap;
3072   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3073   MPI_Comm       comm;
3074   VecScatter     Mvctx=a->Mvctx;
3075 
3076   PetscFunctionBegin;
3077   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3078   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3079 
3080   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3081   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3082   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3083   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3084   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3085 
3086   /* Get start indices */
3087   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3088   isstart -= ncols;
3089   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3090 
3091   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3092   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3093   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3094   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3095   for (i=0; i<ncols; i++) {
3096     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3097     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3098     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3099   }
3100   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3101   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3102   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3103 
3104   /* Get iscol_d */
3105   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3106   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3107   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3108 
3109   /* Get isrow_d */
3110   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3111   rstart = mat->rmap->rstart;
3112   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3113   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3114   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3115   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3116 
3117   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3118   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3119   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3120 
3121   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3122   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124 
3125   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3126 
3127   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3129 
3130   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3131   /* off-process column indices */
3132   count = 0;
3133   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3134   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3135 
3136   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3137   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3138   for (i=0; i<Bn; i++) {
3139     if (PetscRealPart(xarray[i]) > -1.0) {
3140       idx[count]     = i;                   /* local column index in off-diagonal part B */
3141       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3142       count++;
3143     }
3144   }
3145   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3146   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3147 
3148   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3149   /* cannot ensure iscol_o has same blocksize as iscol! */
3150 
3151   ierr = PetscFree(idx);CHKERRQ(ierr);
3152   *garray = cmap1;
3153 
3154   ierr = VecDestroy(&x);CHKERRQ(ierr);
3155   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3156   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3161 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3162 {
3163   PetscErrorCode ierr;
3164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3165   Mat            M = NULL;
3166   MPI_Comm       comm;
3167   IS             iscol_d,isrow_d,iscol_o;
3168   Mat            Asub = NULL,Bsub = NULL;
3169   PetscInt       n;
3170 
3171   PetscFunctionBegin;
3172   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3173 
3174   if (call == MAT_REUSE_MATRIX) {
3175     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3176     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3177     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3178 
3179     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3180     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3181 
3182     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3183     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3184 
3185     /* Update diagonal and off-diagonal portions of submat */
3186     asub = (Mat_MPIAIJ*)(*submat)->data;
3187     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3188     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3189     if (n) {
3190       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3191     }
3192     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3193     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194 
3195   } else { /* call == MAT_INITIAL_MATRIX) */
3196     const PetscInt *garray;
3197     PetscInt        BsubN;
3198 
3199     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3200     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3201 
3202     /* Create local submatrices Asub and Bsub */
3203     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3204     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3205 
3206     /* Create submatrix M */
3207     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3208 
3209     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3210     asub = (Mat_MPIAIJ*)M->data;
3211 
3212     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3213     n = asub->B->cmap->N;
3214     if (BsubN > n) {
3215       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3216       const PetscInt *idx;
3217       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3218       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3219 
3220       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3221       j = 0;
3222       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3223       for (i=0; i<n; i++) {
3224         if (j >= BsubN) break;
3225         while (subgarray[i] > garray[j]) j++;
3226 
3227         if (subgarray[i] == garray[j]) {
3228           idx_new[i] = idx[j++];
3229         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3230       }
3231       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3232 
3233       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3234       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3235 
3236     } else if (BsubN < n) {
3237       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3238     }
3239 
3240     ierr = PetscFree(garray);CHKERRQ(ierr);
3241     *submat = M;
3242 
3243     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3244     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3245     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3246 
3247     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3248     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3249 
3250     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3251     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3252   }
3253   PetscFunctionReturn(0);
3254 }
3255 
3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3257 {
3258   PetscErrorCode ierr;
3259   IS             iscol_local=NULL,isrow_d;
3260   PetscInt       csize;
3261   PetscInt       n,i,j,start,end;
3262   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3263   MPI_Comm       comm;
3264 
3265   PetscFunctionBegin;
3266   /* If isrow has same processor distribution as mat,
3267      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3268   if (call == MAT_REUSE_MATRIX) {
3269     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3270     if (isrow_d) {
3271       sameRowDist  = PETSC_TRUE;
3272       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3273     } else {
3274       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3275       if (iscol_local) {
3276         sameRowDist  = PETSC_TRUE;
3277         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3278       }
3279     }
3280   } else {
3281     /* Check if isrow has same processor distribution as mat */
3282     sameDist[0] = PETSC_FALSE;
3283     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3284     if (!n) {
3285       sameDist[0] = PETSC_TRUE;
3286     } else {
3287       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3288       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3289       if (i >= start && j < end) {
3290         sameDist[0] = PETSC_TRUE;
3291       }
3292     }
3293 
3294     /* Check if iscol has same processor distribution as mat */
3295     sameDist[1] = PETSC_FALSE;
3296     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3297     if (!n) {
3298       sameDist[1] = PETSC_TRUE;
3299     } else {
3300       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3301       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3302       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3303     }
3304 
3305     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3306     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3307     sameRowDist = tsameDist[0];
3308   }
3309 
3310   if (sameRowDist) {
3311     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3312       /* isrow and iscol have same processor distribution as mat */
3313       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3314       PetscFunctionReturn(0);
3315     } else { /* sameRowDist */
3316       /* isrow has same processor distribution as mat */
3317       if (call == MAT_INITIAL_MATRIX) {
3318         PetscBool sorted;
3319         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3320         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3321         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3322         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3323 
3324         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3325         if (sorted) {
3326           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3327           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3328           PetscFunctionReturn(0);
3329         }
3330       } else { /* call == MAT_REUSE_MATRIX */
3331         IS    iscol_sub;
3332         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3333         if (iscol_sub) {
3334           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3335           PetscFunctionReturn(0);
3336         }
3337       }
3338     }
3339   }
3340 
3341   /* General case: iscol -> iscol_local which has global size of iscol */
3342   if (call == MAT_REUSE_MATRIX) {
3343     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3344     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3345   } else {
3346     if (!iscol_local) {
3347       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3348     }
3349   }
3350 
3351   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3352   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3353 
3354   if (call == MAT_INITIAL_MATRIX) {
3355     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3356     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3357   }
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 /*@C
3362      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3363          and "off-diagonal" part of the matrix in CSR format.
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  comm - MPI communicator
3369 .  A - "diagonal" portion of matrix
3370 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3371 -  garray - global index of B columns
3372 
3373    Output Parameter:
3374 .   mat - the matrix, with input A as its local diagonal matrix
3375    Level: advanced
3376 
3377    Notes:
3378        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3379        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3380 
3381 .seealso: MatCreateMPIAIJWithSplitArrays()
3382 @*/
3383 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3384 {
3385   PetscErrorCode ierr;
3386   Mat_MPIAIJ     *maij;
3387   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3388   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3389   PetscScalar    *oa=b->a;
3390   Mat            Bnew;
3391   PetscInt       m,n,N;
3392 
3393   PetscFunctionBegin;
3394   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3395   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3396   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3397   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3398   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3399   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3400 
3401   /* Get global columns of mat */
3402   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3403 
3404   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3405   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3406   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3407   maij = (Mat_MPIAIJ*)(*mat)->data;
3408 
3409   (*mat)->preallocated = PETSC_TRUE;
3410 
3411   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3412   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3413 
3414   /* Set A as diagonal portion of *mat */
3415   maij->A = A;
3416 
3417   nz = oi[m];
3418   for (i=0; i<nz; i++) {
3419     col   = oj[i];
3420     oj[i] = garray[col];
3421   }
3422 
3423    /* Set Bnew as off-diagonal portion of *mat */
3424   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3425   bnew        = (Mat_SeqAIJ*)Bnew->data;
3426   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3427   maij->B     = Bnew;
3428 
3429   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3430 
3431   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3432   b->free_a       = PETSC_FALSE;
3433   b->free_ij      = PETSC_FALSE;
3434   ierr = MatDestroy(&B);CHKERRQ(ierr);
3435 
3436   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3437   bnew->free_a       = PETSC_TRUE;
3438   bnew->free_ij      = PETSC_TRUE;
3439 
3440   /* condense columns of maij->B */
3441   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3442   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3443   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3444   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3445   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3450 
3451 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3452 {
3453   PetscErrorCode ierr;
3454   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3455   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3456   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3457   Mat            M,Msub,B=a->B;
3458   MatScalar      *aa;
3459   Mat_SeqAIJ     *aij;
3460   PetscInt       *garray = a->garray,*colsub,Ncols;
3461   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3462   IS             iscol_sub,iscmap;
3463   const PetscInt *is_idx,*cmap;
3464   PetscBool      allcolumns=PETSC_FALSE;
3465   MPI_Comm       comm;
3466 
3467   PetscFunctionBegin;
3468   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3469 
3470   if (call == MAT_REUSE_MATRIX) {
3471     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3472     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3473     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3474 
3475     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3476     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3477 
3478     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3479     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3480 
3481     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3482 
3483   } else { /* call == MAT_INITIAL_MATRIX) */
3484     PetscBool flg;
3485 
3486     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3487     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3488 
3489     /* (1) iscol -> nonscalable iscol_local */
3490     /* Check for special case: each processor gets entire matrix columns */
3491     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3492     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3493     if (allcolumns) {
3494       iscol_sub = iscol_local;
3495       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3496       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3497 
3498     } else {
3499       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3500       PetscInt *idx,*cmap1,k;
3501       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3502       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3503       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3504       count = 0;
3505       k     = 0;
3506       for (i=0; i<Ncols; i++) {
3507         j = is_idx[i];
3508         if (j >= cstart && j < cend) {
3509           /* diagonal part of mat */
3510           idx[count]     = j;
3511           cmap1[count++] = i; /* column index in submat */
3512         } else if (Bn) {
3513           /* off-diagonal part of mat */
3514           if (j == garray[k]) {
3515             idx[count]     = j;
3516             cmap1[count++] = i;  /* column index in submat */
3517           } else if (j > garray[k]) {
3518             while (j > garray[k] && k < Bn-1) k++;
3519             if (j == garray[k]) {
3520               idx[count]     = j;
3521               cmap1[count++] = i; /* column index in submat */
3522             }
3523           }
3524         }
3525       }
3526       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3527 
3528       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3529       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3530       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3531 
3532       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3533     }
3534 
3535     /* (3) Create sequential Msub */
3536     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3537   }
3538 
3539   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3540   aij  = (Mat_SeqAIJ*)(Msub)->data;
3541   ii   = aij->i;
3542   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3543 
3544   /*
3545       m - number of local rows
3546       Ncols - number of columns (same on all processors)
3547       rstart - first row in new global matrix generated
3548   */
3549   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3550 
3551   if (call == MAT_INITIAL_MATRIX) {
3552     /* (4) Create parallel newmat */
3553     PetscMPIInt    rank,size;
3554     PetscInt       csize;
3555 
3556     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3557     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3558 
3559     /*
3560         Determine the number of non-zeros in the diagonal and off-diagonal
3561         portions of the matrix in order to do correct preallocation
3562     */
3563 
3564     /* first get start and end of "diagonal" columns */
3565     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3566     if (csize == PETSC_DECIDE) {
3567       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3568       if (mglobal == Ncols) { /* square matrix */
3569         nlocal = m;
3570       } else {
3571         nlocal = Ncols/size + ((Ncols % size) > rank);
3572       }
3573     } else {
3574       nlocal = csize;
3575     }
3576     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3577     rstart = rend - nlocal;
3578     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3579 
3580     /* next, compute all the lengths */
3581     jj    = aij->j;
3582     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3583     olens = dlens + m;
3584     for (i=0; i<m; i++) {
3585       jend = ii[i+1] - ii[i];
3586       olen = 0;
3587       dlen = 0;
3588       for (j=0; j<jend; j++) {
3589         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3590         else dlen++;
3591         jj++;
3592       }
3593       olens[i] = olen;
3594       dlens[i] = dlen;
3595     }
3596 
3597     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3598     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3599 
3600     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3601     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3602     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3603     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3604     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3605     ierr = PetscFree(dlens);CHKERRQ(ierr);
3606 
3607   } else { /* call == MAT_REUSE_MATRIX */
3608     M    = *newmat;
3609     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3610     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3611     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3612     /*
3613          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3614        rather than the slower MatSetValues().
3615     */
3616     M->was_assembled = PETSC_TRUE;
3617     M->assembled     = PETSC_FALSE;
3618   }
3619 
3620   /* (5) Set values of Msub to *newmat */
3621   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3622   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3623 
3624   jj   = aij->j;
3625   aa   = aij->a;
3626   for (i=0; i<m; i++) {
3627     row = rstart + i;
3628     nz  = ii[i+1] - ii[i];
3629     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3630     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3631     jj += nz; aa += nz;
3632   }
3633   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3634 
3635   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637 
3638   ierr = PetscFree(colsub);CHKERRQ(ierr);
3639 
3640   /* save Msub, iscol_sub and iscmap used in processor for next request */
3641   if (call ==  MAT_INITIAL_MATRIX) {
3642     *newmat = M;
3643     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3644     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3645 
3646     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3647     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3648 
3649     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3650     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3651 
3652     if (iscol_local) {
3653       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3654       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3655     }
3656   }
3657   PetscFunctionReturn(0);
3658 }
3659 
3660 /*
3661     Not great since it makes two copies of the submatrix, first an SeqAIJ
3662   in local and then by concatenating the local matrices the end result.
3663   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3664 
3665   Note: This requires a sequential iscol with all indices.
3666 */
3667 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscMPIInt    rank,size;
3671   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3672   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3673   Mat            M,Mreuse;
3674   MatScalar      *aa,*vwork;
3675   MPI_Comm       comm;
3676   Mat_SeqAIJ     *aij;
3677   PetscBool      colflag,allcolumns=PETSC_FALSE;
3678 
3679   PetscFunctionBegin;
3680   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3683 
3684   /* Check for special case: each processor gets entire matrix columns */
3685   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3686   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3687   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3688 
3689   if (call ==  MAT_REUSE_MATRIX) {
3690     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3691     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3692     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3693   } else {
3694     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3695   }
3696 
3697   /*
3698       m - number of local rows
3699       n - number of columns (same on all processors)
3700       rstart - first row in new global matrix generated
3701   */
3702   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3703   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3704   if (call == MAT_INITIAL_MATRIX) {
3705     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3706     ii  = aij->i;
3707     jj  = aij->j;
3708 
3709     /*
3710         Determine the number of non-zeros in the diagonal and off-diagonal
3711         portions of the matrix in order to do correct preallocation
3712     */
3713 
3714     /* first get start and end of "diagonal" columns */
3715     if (csize == PETSC_DECIDE) {
3716       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3717       if (mglobal == n) { /* square matrix */
3718         nlocal = m;
3719       } else {
3720         nlocal = n/size + ((n % size) > rank);
3721       }
3722     } else {
3723       nlocal = csize;
3724     }
3725     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3726     rstart = rend - nlocal;
3727     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3728 
3729     /* next, compute all the lengths */
3730     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3731     olens = dlens + m;
3732     for (i=0; i<m; i++) {
3733       jend = ii[i+1] - ii[i];
3734       olen = 0;
3735       dlen = 0;
3736       for (j=0; j<jend; j++) {
3737         if (*jj < rstart || *jj >= rend) olen++;
3738         else dlen++;
3739         jj++;
3740       }
3741       olens[i] = olen;
3742       dlens[i] = dlen;
3743     }
3744     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3745     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3746     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3747     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3748     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3749     ierr = PetscFree(dlens);CHKERRQ(ierr);
3750   } else {
3751     PetscInt ml,nl;
3752 
3753     M    = *newmat;
3754     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3755     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3756     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3757     /*
3758          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3759        rather than the slower MatSetValues().
3760     */
3761     M->was_assembled = PETSC_TRUE;
3762     M->assembled     = PETSC_FALSE;
3763   }
3764   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3765   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3766   ii   = aij->i;
3767   jj   = aij->j;
3768   aa   = aij->a;
3769   for (i=0; i<m; i++) {
3770     row   = rstart + i;
3771     nz    = ii[i+1] - ii[i];
3772     cwork = jj;     jj += nz;
3773     vwork = aa;     aa += nz;
3774     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3775   }
3776 
3777   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3778   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   *newmat = M;
3780 
3781   /* save submatrix used in processor for next request */
3782   if (call ==  MAT_INITIAL_MATRIX) {
3783     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3784     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3785   }
3786   PetscFunctionReturn(0);
3787 }
3788 
3789 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3790 {
3791   PetscInt       m,cstart, cend,j,nnz,i,d;
3792   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3793   const PetscInt *JJ;
3794   PetscScalar    *values;
3795   PetscErrorCode ierr;
3796   PetscBool      nooffprocentries;
3797 
3798   PetscFunctionBegin;
3799   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3800 
3801   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3802   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3803   m      = B->rmap->n;
3804   cstart = B->cmap->rstart;
3805   cend   = B->cmap->rend;
3806   rstart = B->rmap->rstart;
3807 
3808   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3809 
3810 #if defined(PETSC_USE_DEBUG)
3811   for (i=0; i<m; i++) {
3812     nnz = Ii[i+1]- Ii[i];
3813     JJ  = J + Ii[i];
3814     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3815     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3816     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3817   }
3818 #endif
3819 
3820   for (i=0; i<m; i++) {
3821     nnz     = Ii[i+1]- Ii[i];
3822     JJ      = J + Ii[i];
3823     nnz_max = PetscMax(nnz_max,nnz);
3824     d       = 0;
3825     for (j=0; j<nnz; j++) {
3826       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3827     }
3828     d_nnz[i] = d;
3829     o_nnz[i] = nnz - d;
3830   }
3831   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3832   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3833 
3834   if (v) values = (PetscScalar*)v;
3835   else {
3836     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3837   }
3838 
3839   for (i=0; i<m; i++) {
3840     ii   = i + rstart;
3841     nnz  = Ii[i+1]- Ii[i];
3842     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3843   }
3844   nooffprocentries    = B->nooffprocentries;
3845   B->nooffprocentries = PETSC_TRUE;
3846   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3848   B->nooffprocentries = nooffprocentries;
3849 
3850   if (!v) {
3851     ierr = PetscFree(values);CHKERRQ(ierr);
3852   }
3853   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 /*@
3858    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3859    (the default parallel PETSc format).
3860 
3861    Collective on MPI_Comm
3862 
3863    Input Parameters:
3864 +  B - the matrix
3865 .  i - the indices into j for the start of each local row (starts with zero)
3866 .  j - the column indices for each local row (starts with zero)
3867 -  v - optional values in the matrix
3868 
3869    Level: developer
3870 
3871    Notes:
3872        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3873      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3874      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3875 
3876        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3877 
3878        The format which is used for the sparse matrix input, is equivalent to a
3879     row-major ordering.. i.e for the following matrix, the input data expected is
3880     as shown
3881 
3882 $        1 0 0
3883 $        2 0 3     P0
3884 $       -------
3885 $        4 5 6     P1
3886 $
3887 $     Process0 [P0]: rows_owned=[0,1]
3888 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3889 $        j =  {0,0,2}  [size = 3]
3890 $        v =  {1,2,3}  [size = 3]
3891 $
3892 $     Process1 [P1]: rows_owned=[2]
3893 $        i =  {0,3}    [size = nrow+1  = 1+1]
3894 $        j =  {0,1,2}  [size = 3]
3895 $        v =  {4,5,6}  [size = 3]
3896 
3897 .keywords: matrix, aij, compressed row, sparse, parallel
3898 
3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3900           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3901 @*/
3902 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3903 {
3904   PetscErrorCode ierr;
3905 
3906   PetscFunctionBegin;
3907   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 /*@C
3912    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3913    (the default parallel PETSc format).  For good matrix assembly performance
3914    the user should preallocate the matrix storage by setting the parameters
3915    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3916    performance can be increased by more than a factor of 50.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3923            (same value is used for all local rows)
3924 .  d_nnz - array containing the number of nonzeros in the various rows of the
3925            DIAGONAL portion of the local submatrix (possibly different for each row)
3926            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3927            The size of this array is equal to the number of local rows, i.e 'm'.
3928            For matrices that will be factored, you must leave room for (and set)
3929            the diagonal entry even if it is zero.
3930 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3931            submatrix (same value is used for all local rows).
3932 -  o_nnz - array containing the number of nonzeros in the various rows of the
3933            OFF-DIAGONAL portion of the local submatrix (possibly different for
3934            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3935            structure. The size of this array is equal to the number
3936            of local rows, i.e 'm'.
3937 
3938    If the *_nnz parameter is given then the *_nz parameter is ignored
3939 
3940    The AIJ format (also called the Yale sparse matrix format or
3941    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3942    storage.  The stored row and column indices begin with zero.
3943    See Users-Manual: ch_mat for details.
3944 
3945    The parallel matrix is partitioned such that the first m0 rows belong to
3946    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3947    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3948 
3949    The DIAGONAL portion of the local submatrix of a processor can be defined
3950    as the submatrix which is obtained by extraction the part corresponding to
3951    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3952    first row that belongs to the processor, r2 is the last row belonging to
3953    the this processor, and c1-c2 is range of indices of the local part of a
3954    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3955    common case of a square matrix, the row and column ranges are the same and
3956    the DIAGONAL part is also square. The remaining portion of the local
3957    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3958 
3959    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3960 
3961    You can call MatGetInfo() to get information on how effective the preallocation was;
3962    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3963    You can also run with the option -info and look for messages with the string
3964    malloc in them to see if additional memory allocation was needed.
3965 
3966    Example usage:
3967 
3968    Consider the following 8x8 matrix with 34 non-zero values, that is
3969    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3970    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3971    as follows:
3972 
3973 .vb
3974             1  2  0  |  0  3  0  |  0  4
3975     Proc0   0  5  6  |  7  0  0  |  8  0
3976             9  0 10  | 11  0  0  | 12  0
3977     -------------------------------------
3978            13  0 14  | 15 16 17  |  0  0
3979     Proc1   0 18  0  | 19 20 21  |  0  0
3980             0  0  0  | 22 23  0  | 24  0
3981     -------------------------------------
3982     Proc2  25 26 27  |  0  0 28  | 29  0
3983            30  0  0  | 31 32 33  |  0 34
3984 .ve
3985 
3986    This can be represented as a collection of submatrices as:
3987 
3988 .vb
3989       A B C
3990       D E F
3991       G H I
3992 .ve
3993 
3994    Where the submatrices A,B,C are owned by proc0, D,E,F are
3995    owned by proc1, G,H,I are owned by proc2.
3996 
3997    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'M','N' parameters are 8,8, and have the same values on all procs.
4000 
4001    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4002    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4003    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4004    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4005    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4006    matrix, ans [DF] as another SeqAIJ matrix.
4007 
4008    When d_nz, o_nz parameters are specified, d_nz storage elements are
4009    allocated for every row of the local diagonal submatrix, and o_nz
4010    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4011    One way to choose d_nz and o_nz is to use the max nonzerors per local
4012    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4013    In this case, the values of d_nz,o_nz are:
4014 .vb
4015      proc0 : dnz = 2, o_nz = 2
4016      proc1 : dnz = 3, o_nz = 2
4017      proc2 : dnz = 1, o_nz = 4
4018 .ve
4019    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4020    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4021    for proc3. i.e we are using 12+15+10=37 storage locations to store
4022    34 values.
4023 
4024    When d_nnz, o_nnz parameters are specified, the storage is specified
4025    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4026    In the above case the values for d_nnz,o_nnz are:
4027 .vb
4028      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4029      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4030      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4031 .ve
4032    Here the space allocated is sum of all the above values i.e 34, and
4033    hence pre-allocation is perfect.
4034 
4035    Level: intermediate
4036 
4037 .keywords: matrix, aij, compressed row, sparse, parallel
4038 
4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4040           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4041 @*/
4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4043 {
4044   PetscErrorCode ierr;
4045 
4046   PetscFunctionBegin;
4047   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4048   PetscValidType(B,1);
4049   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 /*@
4054      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4055          CSR format the local rows.
4056 
4057    Collective on MPI_Comm
4058 
4059    Input Parameters:
4060 +  comm - MPI communicator
4061 .  m - number of local rows (Cannot be PETSC_DECIDE)
4062 .  n - This value should be the same as the local size used in creating the
4063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4064        calculated if N is given) For square matrices n is almost always m.
4065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4067 .   i - row indices
4068 .   j - column indices
4069 -   a - matrix values
4070 
4071    Output Parameter:
4072 .   mat - the matrix
4073 
4074    Level: intermediate
4075 
4076    Notes:
4077        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4078      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4079      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4080 
4081        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4082 
4083        The format which is used for the sparse matrix input, is equivalent to a
4084     row-major ordering.. i.e for the following matrix, the input data expected is
4085     as shown
4086 
4087 $        1 0 0
4088 $        2 0 3     P0
4089 $       -------
4090 $        4 5 6     P1
4091 $
4092 $     Process0 [P0]: rows_owned=[0,1]
4093 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4094 $        j =  {0,0,2}  [size = 3]
4095 $        v =  {1,2,3}  [size = 3]
4096 $
4097 $     Process1 [P1]: rows_owned=[2]
4098 $        i =  {0,3}    [size = nrow+1  = 1+1]
4099 $        j =  {0,1,2}  [size = 3]
4100 $        v =  {4,5,6}  [size = 3]
4101 
4102 .keywords: matrix, aij, compressed row, sparse, parallel
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4105           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4106 @*/
4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4108 {
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4113   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4114   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4115   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4116   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4117   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4118   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4119   PetscFunctionReturn(0);
4120 }
4121 
4122 /*@C
4123    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4124    (the default parallel PETSc format).  For good matrix assembly performance
4125    the user should preallocate the matrix storage by setting the parameters
4126    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4127    performance can be increased by more than a factor of 50.
4128 
4129    Collective on MPI_Comm
4130 
4131    Input Parameters:
4132 +  comm - MPI communicator
4133 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4134            This value should be the same as the local size used in creating the
4135            y vector for the matrix-vector product y = Ax.
4136 .  n - This value should be the same as the local size used in creating the
4137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4138        calculated if N is given) For square matrices n is almost always m.
4139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4141 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4142            (same value is used for all local rows)
4143 .  d_nnz - array containing the number of nonzeros in the various rows of the
4144            DIAGONAL portion of the local submatrix (possibly different for each row)
4145            or NULL, if d_nz is used to specify the nonzero structure.
4146            The size of this array is equal to the number of local rows, i.e 'm'.
4147 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4148            submatrix (same value is used for all local rows).
4149 -  o_nnz - array containing the number of nonzeros in the various rows of the
4150            OFF-DIAGONAL portion of the local submatrix (possibly different for
4151            each row) or NULL, if o_nz is used to specify the nonzero
4152            structure. The size of this array is equal to the number
4153            of local rows, i.e 'm'.
4154 
4155    Output Parameter:
4156 .  A - the matrix
4157 
4158    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4159    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4160    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4161 
4162    Notes:
4163    If the *_nnz parameter is given then the *_nz parameter is ignored
4164 
4165    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4166    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4167    storage requirements for this matrix.
4168 
4169    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4170    processor than it must be used on all processors that share the object for
4171    that argument.
4172 
4173    The user MUST specify either the local or global matrix dimensions
4174    (possibly both).
4175 
4176    The parallel matrix is partitioned across processors such that the
4177    first m0 rows belong to process 0, the next m1 rows belong to
4178    process 1, the next m2 rows belong to process 2 etc.. where
4179    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4180    values corresponding to [m x N] submatrix.
4181 
4182    The columns are logically partitioned with the n0 columns belonging
4183    to 0th partition, the next n1 columns belonging to the next
4184    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4185 
4186    The DIAGONAL portion of the local submatrix on any given processor
4187    is the submatrix corresponding to the rows and columns m,n
4188    corresponding to the given processor. i.e diagonal matrix on
4189    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4190    etc. The remaining portion of the local submatrix [m x (N-n)]
4191    constitute the OFF-DIAGONAL portion. The example below better
4192    illustrates this concept.
4193 
4194    For a square global matrix we define each processor's diagonal portion
4195    to be its local rows and the corresponding columns (a square submatrix);
4196    each processor's off-diagonal portion encompasses the remainder of the
4197    local matrix (a rectangular submatrix).
4198 
4199    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4200 
4201    When calling this routine with a single process communicator, a matrix of
4202    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4203    type of communicator, use the construction mechanism
4204 .vb
4205      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4206 .ve
4207 
4208 $     MatCreate(...,&A);
4209 $     MatSetType(A,MATMPIAIJ);
4210 $     MatSetSizes(A, m,n,M,N);
4211 $     MatMPIAIJSetPreallocation(A,...);
4212 
4213    By default, this format uses inodes (identical nodes) when possible.
4214    We search for consecutive rows with the same nonzero structure, thereby
4215    reusing matrix information to achieve increased efficiency.
4216 
4217    Options Database Keys:
4218 +  -mat_no_inode  - Do not use inodes
4219 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4220 
4221 
4222 
4223    Example usage:
4224 
4225    Consider the following 8x8 matrix with 34 non-zero values, that is
4226    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4227    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4228    as follows
4229 
4230 .vb
4231             1  2  0  |  0  3  0  |  0  4
4232     Proc0   0  5  6  |  7  0  0  |  8  0
4233             9  0 10  | 11  0  0  | 12  0
4234     -------------------------------------
4235            13  0 14  | 15 16 17  |  0  0
4236     Proc1   0 18  0  | 19 20 21  |  0  0
4237             0  0  0  | 22 23  0  | 24  0
4238     -------------------------------------
4239     Proc2  25 26 27  |  0  0 28  | 29  0
4240            30  0  0  | 31 32 33  |  0 34
4241 .ve
4242 
4243    This can be represented as a collection of submatrices as
4244 
4245 .vb
4246       A B C
4247       D E F
4248       G H I
4249 .ve
4250 
4251    Where the submatrices A,B,C are owned by proc0, D,E,F are
4252    owned by proc1, G,H,I are owned by proc2.
4253 
4254    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4256    The 'M','N' parameters are 8,8, and have the same values on all procs.
4257 
4258    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4259    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4260    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4261    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4262    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4263    matrix, ans [DF] as another SeqAIJ matrix.
4264 
4265    When d_nz, o_nz parameters are specified, d_nz storage elements are
4266    allocated for every row of the local diagonal submatrix, and o_nz
4267    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4268    One way to choose d_nz and o_nz is to use the max nonzerors per local
4269    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4270    In this case, the values of d_nz,o_nz are
4271 .vb
4272      proc0 : dnz = 2, o_nz = 2
4273      proc1 : dnz = 3, o_nz = 2
4274      proc2 : dnz = 1, o_nz = 4
4275 .ve
4276    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4277    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4278    for proc3. i.e we are using 12+15+10=37 storage locations to store
4279    34 values.
4280 
4281    When d_nnz, o_nnz parameters are specified, the storage is specified
4282    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4283    In the above case the values for d_nnz,o_nnz are
4284 .vb
4285      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4286      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4287      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4288 .ve
4289    Here the space allocated is sum of all the above values i.e 34, and
4290    hence pre-allocation is perfect.
4291 
4292    Level: intermediate
4293 
4294 .keywords: matrix, aij, compressed row, sparse, parallel
4295 
4296 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4297           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4298 @*/
4299 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4300 {
4301   PetscErrorCode ierr;
4302   PetscMPIInt    size;
4303 
4304   PetscFunctionBegin;
4305   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4306   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4307   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4308   if (size > 1) {
4309     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4310     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4311   } else {
4312     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4313     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4314   }
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4319 {
4320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4321   PetscBool      flg;
4322   PetscErrorCode ierr;
4323 
4324   PetscFunctionBegin;
4325   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4326   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4327   if (Ad)     *Ad     = a->A;
4328   if (Ao)     *Ao     = a->B;
4329   if (colmap) *colmap = a->garray;
4330   PetscFunctionReturn(0);
4331 }
4332 
4333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       m,N,i,rstart,nnz,Ii;
4337   PetscInt       *indx;
4338   PetscScalar    *values;
4339 
4340   PetscFunctionBegin;
4341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4342   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4343     PetscInt       *dnz,*onz,sum,bs,cbs;
4344 
4345     if (n == PETSC_DECIDE) {
4346       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4347     }
4348     /* Check sum(n) = N */
4349     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4350     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4351 
4352     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4353     rstart -= m;
4354 
4355     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4356     for (i=0; i<m; i++) {
4357       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4358       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4359       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4360     }
4361 
4362     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4363     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4364     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4365     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4366     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4367     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4368     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4369     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4370   }
4371 
4372   /* numeric phase */
4373   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4374   for (i=0; i<m; i++) {
4375     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376     Ii   = i + rstart;
4377     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4378     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4379   }
4380   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4381   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4386 {
4387   PetscErrorCode    ierr;
4388   PetscMPIInt       rank;
4389   PetscInt          m,N,i,rstart,nnz;
4390   size_t            len;
4391   const PetscInt    *indx;
4392   PetscViewer       out;
4393   char              *name;
4394   Mat               B;
4395   const PetscScalar *values;
4396 
4397   PetscFunctionBegin;
4398   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4399   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4400   /* Should this be the type of the diagonal block of A? */
4401   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4402   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4403   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4404   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4405   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4406   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4407   for (i=0; i<m; i++) {
4408     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4409     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4410     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4411   }
4412   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4414 
4415   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4416   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4417   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4418   sprintf(name,"%s.%d",outfile,rank);
4419   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4420   ierr = PetscFree(name);CHKERRQ(ierr);
4421   ierr = MatView(B,out);CHKERRQ(ierr);
4422   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4423   ierr = MatDestroy(&B);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4428 {
4429   PetscErrorCode      ierr;
4430   Mat_Merge_SeqsToMPI *merge;
4431   PetscContainer      container;
4432 
4433   PetscFunctionBegin;
4434   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4435   if (container) {
4436     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4448     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4449     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4450     ierr = PetscFree(merge);CHKERRQ(ierr);
4451     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4452   }
4453   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 #include <../src/mat/utils/freespace.h>
4458 #include <petscbt.h>
4459 
4460 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4461 {
4462   PetscErrorCode      ierr;
4463   MPI_Comm            comm;
4464   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4465   PetscMPIInt         size,rank,taga,*len_s;
4466   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4467   PetscInt            proc,m;
4468   PetscInt            **buf_ri,**buf_rj;
4469   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4470   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4471   MPI_Request         *s_waits,*r_waits;
4472   MPI_Status          *status;
4473   MatScalar           *aa=a->a;
4474   MatScalar           **abuf_r,*ba_i;
4475   Mat_Merge_SeqsToMPI *merge;
4476   PetscContainer      container;
4477 
4478   PetscFunctionBegin;
4479   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4480   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4481 
4482   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4484 
4485   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4486   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4487 
4488   bi     = merge->bi;
4489   bj     = merge->bj;
4490   buf_ri = merge->buf_ri;
4491   buf_rj = merge->buf_rj;
4492 
4493   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4494   owners = merge->rowmap->range;
4495   len_s  = merge->len_s;
4496 
4497   /* send and recv matrix values */
4498   /*-----------------------------*/
4499   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4500   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4501 
4502   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4503   for (proc=0,k=0; proc<size; proc++) {
4504     if (!len_s[proc]) continue;
4505     i    = owners[proc];
4506     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4507     k++;
4508   }
4509 
4510   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4511   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4512   ierr = PetscFree(status);CHKERRQ(ierr);
4513 
4514   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4515   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4516 
4517   /* insert mat values of mpimat */
4518   /*----------------------------*/
4519   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4520   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4521 
4522   for (k=0; k<merge->nrecv; k++) {
4523     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4524     nrows       = *(buf_ri_k[k]);
4525     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4526     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4527   }
4528 
4529   /* set values of ba */
4530   m = merge->rowmap->n;
4531   for (i=0; i<m; i++) {
4532     arow = owners[rank] + i;
4533     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4534     bnzi = bi[i+1] - bi[i];
4535     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4536 
4537     /* add local non-zero vals of this proc's seqmat into ba */
4538     anzi   = ai[arow+1] - ai[arow];
4539     aj     = a->j + ai[arow];
4540     aa     = a->a + ai[arow];
4541     nextaj = 0;
4542     for (j=0; nextaj<anzi; j++) {
4543       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4544         ba_i[j] += aa[nextaj++];
4545       }
4546     }
4547 
4548     /* add received vals into ba */
4549     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4550       /* i-th row */
4551       if (i == *nextrow[k]) {
4552         anzi   = *(nextai[k]+1) - *nextai[k];
4553         aj     = buf_rj[k] + *(nextai[k]);
4554         aa     = abuf_r[k] + *(nextai[k]);
4555         nextaj = 0;
4556         for (j=0; nextaj<anzi; j++) {
4557           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4558             ba_i[j] += aa[nextaj++];
4559           }
4560         }
4561         nextrow[k]++; nextai[k]++;
4562       }
4563     }
4564     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4565   }
4566   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4567   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4568 
4569   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4570   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4571   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4572   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4573   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4578 {
4579   PetscErrorCode      ierr;
4580   Mat                 B_mpi;
4581   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4582   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4583   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4584   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4585   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4586   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4587   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4588   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4589   MPI_Status          *status;
4590   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4591   PetscBT             lnkbt;
4592   Mat_Merge_SeqsToMPI *merge;
4593   PetscContainer      container;
4594 
4595   PetscFunctionBegin;
4596   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4597 
4598   /* make sure it is a PETSc comm */
4599   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4602 
4603   ierr = PetscNew(&merge);CHKERRQ(ierr);
4604   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4605 
4606   /* determine row ownership */
4607   /*---------------------------------------------------------*/
4608   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4613   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4614   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4615 
4616   m      = merge->rowmap->n;
4617   owners = merge->rowmap->range;
4618 
4619   /* determine the number of messages to send, their lengths */
4620   /*---------------------------------------------------------*/
4621   len_s = merge->len_s;
4622 
4623   len          = 0; /* length of buf_si[] */
4624   merge->nsend = 0;
4625   for (proc=0; proc<size; proc++) {
4626     len_si[proc] = 0;
4627     if (proc == rank) {
4628       len_s[proc] = 0;
4629     } else {
4630       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4631       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4632     }
4633     if (len_s[proc]) {
4634       merge->nsend++;
4635       nrows = 0;
4636       for (i=owners[proc]; i<owners[proc+1]; i++) {
4637         if (ai[i+1] > ai[i]) nrows++;
4638       }
4639       len_si[proc] = 2*(nrows+1);
4640       len         += len_si[proc];
4641     }
4642   }
4643 
4644   /* determine the number and length of messages to receive for ij-structure */
4645   /*-------------------------------------------------------------------------*/
4646   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4647   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4648 
4649   /* post the Irecv of j-structure */
4650   /*-------------------------------*/
4651   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4652   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4653 
4654   /* post the Isend of j-structure */
4655   /*--------------------------------*/
4656   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4657 
4658   for (proc=0, k=0; proc<size; proc++) {
4659     if (!len_s[proc]) continue;
4660     i    = owners[proc];
4661     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4662     k++;
4663   }
4664 
4665   /* receives and sends of j-structure are complete */
4666   /*------------------------------------------------*/
4667   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4668   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4669 
4670   /* send and recv i-structure */
4671   /*---------------------------*/
4672   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4673   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4674 
4675   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4676   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4677   for (proc=0,k=0; proc<size; proc++) {
4678     if (!len_s[proc]) continue;
4679     /* form outgoing message for i-structure:
4680          buf_si[0]:                 nrows to be sent
4681                [1:nrows]:           row index (global)
4682                [nrows+1:2*nrows+1]: i-structure index
4683     */
4684     /*-------------------------------------------*/
4685     nrows       = len_si[proc]/2 - 1;
4686     buf_si_i    = buf_si + nrows+1;
4687     buf_si[0]   = nrows;
4688     buf_si_i[0] = 0;
4689     nrows       = 0;
4690     for (i=owners[proc]; i<owners[proc+1]; i++) {
4691       anzi = ai[i+1] - ai[i];
4692       if (anzi) {
4693         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4694         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4695         nrows++;
4696       }
4697     }
4698     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4699     k++;
4700     buf_si += len_si[proc];
4701   }
4702 
4703   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4704   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4705 
4706   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4707   for (i=0; i<merge->nrecv; i++) {
4708     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4709   }
4710 
4711   ierr = PetscFree(len_si);CHKERRQ(ierr);
4712   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4713   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4714   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4715   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4716   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4717   ierr = PetscFree(status);CHKERRQ(ierr);
4718 
4719   /* compute a local seq matrix in each processor */
4720   /*----------------------------------------------*/
4721   /* allocate bi array and free space for accumulating nonzero column info */
4722   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4723   bi[0] = 0;
4724 
4725   /* create and initialize a linked list */
4726   nlnk = N+1;
4727   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4728 
4729   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4730   len  = ai[owners[rank+1]] - ai[owners[rank]];
4731   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4732 
4733   current_space = free_space;
4734 
4735   /* determine symbolic info for each local row */
4736   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4737 
4738   for (k=0; k<merge->nrecv; k++) {
4739     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4740     nrows       = *buf_ri_k[k];
4741     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4742     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4743   }
4744 
4745   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4746   len  = 0;
4747   for (i=0; i<m; i++) {
4748     bnzi = 0;
4749     /* add local non-zero cols of this proc's seqmat into lnk */
4750     arow  = owners[rank] + i;
4751     anzi  = ai[arow+1] - ai[arow];
4752     aj    = a->j + ai[arow];
4753     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4754     bnzi += nlnk;
4755     /* add received col data into lnk */
4756     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4757       if (i == *nextrow[k]) { /* i-th row */
4758         anzi  = *(nextai[k]+1) - *nextai[k];
4759         aj    = buf_rj[k] + *nextai[k];
4760         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4761         bnzi += nlnk;
4762         nextrow[k]++; nextai[k]++;
4763       }
4764     }
4765     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4766 
4767     /* if free space is not available, make more free space */
4768     if (current_space->local_remaining<bnzi) {
4769       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4770       nspacedouble++;
4771     }
4772     /* copy data into free space, then initialize lnk */
4773     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4774     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4775 
4776     current_space->array           += bnzi;
4777     current_space->local_used      += bnzi;
4778     current_space->local_remaining -= bnzi;
4779 
4780     bi[i+1] = bi[i] + bnzi;
4781   }
4782 
4783   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4784 
4785   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4786   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4787   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4788 
4789   /* create symbolic parallel matrix B_mpi */
4790   /*---------------------------------------*/
4791   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4792   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4793   if (n==PETSC_DECIDE) {
4794     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4795   } else {
4796     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4797   }
4798   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4799   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4800   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4801   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4802   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4803 
4804   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4805   B_mpi->assembled    = PETSC_FALSE;
4806   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4807   merge->bi           = bi;
4808   merge->bj           = bj;
4809   merge->buf_ri       = buf_ri;
4810   merge->buf_rj       = buf_rj;
4811   merge->coi          = NULL;
4812   merge->coj          = NULL;
4813   merge->owners_co    = NULL;
4814 
4815   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4816 
4817   /* attach the supporting struct to B_mpi for reuse */
4818   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4819   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4820   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4821   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4822   *mpimat = B_mpi;
4823 
4824   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4825   PetscFunctionReturn(0);
4826 }
4827 
4828 /*@C
4829       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4830                  matrices from each processor
4831 
4832     Collective on MPI_Comm
4833 
4834    Input Parameters:
4835 +    comm - the communicators the parallel matrix will live on
4836 .    seqmat - the input sequential matrices
4837 .    m - number of local rows (or PETSC_DECIDE)
4838 .    n - number of local columns (or PETSC_DECIDE)
4839 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4840 
4841    Output Parameter:
4842 .    mpimat - the parallel matrix generated
4843 
4844     Level: advanced
4845 
4846    Notes:
4847      The dimensions of the sequential matrix in each processor MUST be the same.
4848      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4849      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4850 @*/
4851 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4852 {
4853   PetscErrorCode ierr;
4854   PetscMPIInt    size;
4855 
4856   PetscFunctionBegin;
4857   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4858   if (size == 1) {
4859     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4860     if (scall == MAT_INITIAL_MATRIX) {
4861       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4862     } else {
4863       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4864     }
4865     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4866     PetscFunctionReturn(0);
4867   }
4868   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4869   if (scall == MAT_INITIAL_MATRIX) {
4870     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4871   }
4872   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4873   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 /*@
4878      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4879           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4880           with MatGetSize()
4881 
4882     Not Collective
4883 
4884    Input Parameters:
4885 +    A - the matrix
4886 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4887 
4888    Output Parameter:
4889 .    A_loc - the local sequential matrix generated
4890 
4891     Level: developer
4892 
4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4894 
4895 @*/
4896 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4897 {
4898   PetscErrorCode ierr;
4899   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4900   Mat_SeqAIJ     *mat,*a,*b;
4901   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4902   MatScalar      *aa,*ba,*cam;
4903   PetscScalar    *ca;
4904   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4905   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4906   PetscBool      match;
4907   MPI_Comm       comm;
4908   PetscMPIInt    size;
4909 
4910   PetscFunctionBegin;
4911   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4912   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4913   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4915   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4916 
4917   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4918   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4919   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4920   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4921   aa = a->a; ba = b->a;
4922   if (scall == MAT_INITIAL_MATRIX) {
4923     if (size == 1) {
4924       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4925       PetscFunctionReturn(0);
4926     }
4927 
4928     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4929     ci[0] = 0;
4930     for (i=0; i<am; i++) {
4931       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4932     }
4933     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4934     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4935     k    = 0;
4936     for (i=0; i<am; i++) {
4937       ncols_o = bi[i+1] - bi[i];
4938       ncols_d = ai[i+1] - ai[i];
4939       /* off-diagonal portion of A */
4940       for (jo=0; jo<ncols_o; jo++) {
4941         col = cmap[*bj];
4942         if (col >= cstart) break;
4943         cj[k]   = col; bj++;
4944         ca[k++] = *ba++;
4945       }
4946       /* diagonal portion of A */
4947       for (j=0; j<ncols_d; j++) {
4948         cj[k]   = cstart + *aj++;
4949         ca[k++] = *aa++;
4950       }
4951       /* off-diagonal portion of A */
4952       for (j=jo; j<ncols_o; j++) {
4953         cj[k]   = cmap[*bj++];
4954         ca[k++] = *ba++;
4955       }
4956     }
4957     /* put together the new matrix */
4958     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4959     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4960     /* Since these are PETSc arrays, change flags to free them as necessary. */
4961     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4962     mat->free_a  = PETSC_TRUE;
4963     mat->free_ij = PETSC_TRUE;
4964     mat->nonew   = 0;
4965   } else if (scall == MAT_REUSE_MATRIX) {
4966     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4967     ci = mat->i; cj = mat->j; cam = mat->a;
4968     for (i=0; i<am; i++) {
4969       /* off-diagonal portion of A */
4970       ncols_o = bi[i+1] - bi[i];
4971       for (jo=0; jo<ncols_o; jo++) {
4972         col = cmap[*bj];
4973         if (col >= cstart) break;
4974         *cam++ = *ba++; bj++;
4975       }
4976       /* diagonal portion of A */
4977       ncols_d = ai[i+1] - ai[i];
4978       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4979       /* off-diagonal portion of A */
4980       for (j=jo; j<ncols_o; j++) {
4981         *cam++ = *ba++; bj++;
4982       }
4983     }
4984   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4985   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /*@C
4990      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4991 
4992     Not Collective
4993 
4994    Input Parameters:
4995 +    A - the matrix
4996 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4997 -    row, col - index sets of rows and columns to extract (or NULL)
4998 
4999    Output Parameter:
5000 .    A_loc - the local sequential matrix generated
5001 
5002     Level: developer
5003 
5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5005 
5006 @*/
5007 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5008 {
5009   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5010   PetscErrorCode ierr;
5011   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5012   IS             isrowa,iscola;
5013   Mat            *aloc;
5014   PetscBool      match;
5015 
5016   PetscFunctionBegin;
5017   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5018   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5019   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5020   if (!row) {
5021     start = A->rmap->rstart; end = A->rmap->rend;
5022     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5023   } else {
5024     isrowa = *row;
5025   }
5026   if (!col) {
5027     start = A->cmap->rstart;
5028     cmap  = a->garray;
5029     nzA   = a->A->cmap->n;
5030     nzB   = a->B->cmap->n;
5031     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5032     ncols = 0;
5033     for (i=0; i<nzB; i++) {
5034       if (cmap[i] < start) idx[ncols++] = cmap[i];
5035       else break;
5036     }
5037     imark = i;
5038     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5039     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5040     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5041   } else {
5042     iscola = *col;
5043   }
5044   if (scall != MAT_INITIAL_MATRIX) {
5045     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5046     aloc[0] = *A_loc;
5047   }
5048   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5049   if (!col) { /* attach global id of condensed columns */
5050     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5051   }
5052   *A_loc = aloc[0];
5053   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5054   if (!row) {
5055     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5056   }
5057   if (!col) {
5058     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5059   }
5060   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5061   PetscFunctionReturn(0);
5062 }
5063 
5064 /*@C
5065     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5066 
5067     Collective on Mat
5068 
5069    Input Parameters:
5070 +    A,B - the matrices in mpiaij format
5071 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5072 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5073 
5074    Output Parameter:
5075 +    rowb, colb - index sets of rows and columns of B to extract
5076 -    B_seq - the sequential matrix generated
5077 
5078     Level: developer
5079 
5080 @*/
5081 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5082 {
5083   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5084   PetscErrorCode ierr;
5085   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5086   IS             isrowb,iscolb;
5087   Mat            *bseq=NULL;
5088 
5089   PetscFunctionBegin;
5090   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5091     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5092   }
5093   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5094 
5095   if (scall == MAT_INITIAL_MATRIX) {
5096     start = A->cmap->rstart;
5097     cmap  = a->garray;
5098     nzA   = a->A->cmap->n;
5099     nzB   = a->B->cmap->n;
5100     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5101     ncols = 0;
5102     for (i=0; i<nzB; i++) {  /* row < local row index */
5103       if (cmap[i] < start) idx[ncols++] = cmap[i];
5104       else break;
5105     }
5106     imark = i;
5107     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5108     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5109     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5110     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5111   } else {
5112     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5113     isrowb  = *rowb; iscolb = *colb;
5114     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5115     bseq[0] = *B_seq;
5116   }
5117   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5118   *B_seq = bseq[0];
5119   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5120   if (!rowb) {
5121     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5122   } else {
5123     *rowb = isrowb;
5124   }
5125   if (!colb) {
5126     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5127   } else {
5128     *colb = iscolb;
5129   }
5130   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 /*
5135     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5136     of the OFF-DIAGONAL portion of local A
5137 
5138     Collective on Mat
5139 
5140    Input Parameters:
5141 +    A,B - the matrices in mpiaij format
5142 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5143 
5144    Output Parameter:
5145 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5146 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5147 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5148 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5149 
5150     Level: developer
5151 
5152 */
5153 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5154 {
5155   VecScatter_MPI_General *gen_to,*gen_from;
5156   PetscErrorCode         ierr;
5157   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5158   Mat_SeqAIJ             *b_oth;
5159   VecScatter             ctx;
5160   MPI_Comm               comm;
5161   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5162   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5163   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5164   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5165   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5166   MPI_Request            *rwaits = NULL,*swaits = NULL;
5167   MPI_Status             *sstatus,rstatus;
5168   PetscMPIInt            jj,size;
5169   VecScatterType         type;
5170   PetscBool              mpi1;
5171 
5172   PetscFunctionBegin;
5173   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5175 
5176   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5177     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5178   }
5179   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5181 
5182   if (size == 1) {
5183     startsj_s = NULL;
5184     bufa_ptr  = NULL;
5185     *B_oth    = NULL;
5186     PetscFunctionReturn(0);
5187   }
5188 
5189   ctx = a->Mvctx;
5190   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5191   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5192   if (!mpi1) {
5193     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5194      thus create a->Mvctx_mpi1 */
5195     if (!a->Mvctx_mpi1) {
5196       a->Mvctx_mpi1_flg = PETSC_TRUE;
5197       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5198     }
5199     ctx = a->Mvctx_mpi1;
5200   }
5201   tag = ((PetscObject)ctx)->tag;
5202 
5203   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5204   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5205   nrecvs   = gen_from->n;
5206   nsends   = gen_to->n;
5207 
5208   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5209   srow    = gen_to->indices;    /* local row index to be sent */
5210   sstarts = gen_to->starts;
5211   sprocs  = gen_to->procs;
5212   sstatus = gen_to->sstatus;
5213   sbs     = gen_to->bs;
5214   rstarts = gen_from->starts;
5215   rprocs  = gen_from->procs;
5216   rbs     = gen_from->bs;
5217 
5218   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5219   if (scall == MAT_INITIAL_MATRIX) {
5220     /* i-array */
5221     /*---------*/
5222     /*  post receives */
5223     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5224     for (i=0; i<nrecvs; i++) {
5225       rowlen = rvalues + rstarts[i]*rbs;
5226       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5227       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5228     }
5229 
5230     /* pack the outgoing message */
5231     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5232 
5233     sstartsj[0] = 0;
5234     rstartsj[0] = 0;
5235     len         = 0; /* total length of j or a array to be sent */
5236     k           = 0;
5237     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5238     for (i=0; i<nsends; i++) {
5239       rowlen = svalues + sstarts[i]*sbs;
5240       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5241       for (j=0; j<nrows; j++) {
5242         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5243         for (l=0; l<sbs; l++) {
5244           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5245 
5246           rowlen[j*sbs+l] = ncols;
5247 
5248           len += ncols;
5249           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5250         }
5251         k++;
5252       }
5253       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5254 
5255       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5256     }
5257     /* recvs and sends of i-array are completed */
5258     i = nrecvs;
5259     while (i--) {
5260       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5261     }
5262     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5263     ierr = PetscFree(svalues);CHKERRQ(ierr);
5264 
5265     /* allocate buffers for sending j and a arrays */
5266     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5267     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5268 
5269     /* create i-array of B_oth */
5270     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5271 
5272     b_othi[0] = 0;
5273     len       = 0; /* total length of j or a array to be received */
5274     k         = 0;
5275     for (i=0; i<nrecvs; i++) {
5276       rowlen = rvalues + rstarts[i]*rbs;
5277       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5278       for (j=0; j<nrows; j++) {
5279         b_othi[k+1] = b_othi[k] + rowlen[j];
5280         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5281         k++;
5282       }
5283       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5284     }
5285     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5286 
5287     /* allocate space for j and a arrrays of B_oth */
5288     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5289     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5290 
5291     /* j-array */
5292     /*---------*/
5293     /*  post receives of j-array */
5294     for (i=0; i<nrecvs; i++) {
5295       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5296       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5297     }
5298 
5299     /* pack the outgoing message j-array */
5300     k = 0;
5301     for (i=0; i<nsends; i++) {
5302       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5303       bufJ  = bufj+sstartsj[i];
5304       for (j=0; j<nrows; j++) {
5305         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5306         for (ll=0; ll<sbs; ll++) {
5307           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5308           for (l=0; l<ncols; l++) {
5309             *bufJ++ = cols[l];
5310           }
5311           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5312         }
5313       }
5314       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5315     }
5316 
5317     /* recvs and sends of j-array are completed */
5318     i = nrecvs;
5319     while (i--) {
5320       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5321     }
5322     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5323   } else if (scall == MAT_REUSE_MATRIX) {
5324     sstartsj = *startsj_s;
5325     rstartsj = *startsj_r;
5326     bufa     = *bufa_ptr;
5327     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5328     b_otha   = b_oth->a;
5329   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5330 
5331   /* a-array */
5332   /*---------*/
5333   /*  post receives of a-array */
5334   for (i=0; i<nrecvs; i++) {
5335     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5336     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5337   }
5338 
5339   /* pack the outgoing message a-array */
5340   k = 0;
5341   for (i=0; i<nsends; i++) {
5342     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5343     bufA  = bufa+sstartsj[i];
5344     for (j=0; j<nrows; j++) {
5345       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5346       for (ll=0; ll<sbs; ll++) {
5347         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5348         for (l=0; l<ncols; l++) {
5349           *bufA++ = vals[l];
5350         }
5351         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5352       }
5353     }
5354     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5355   }
5356   /* recvs and sends of a-array are completed */
5357   i = nrecvs;
5358   while (i--) {
5359     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5360   }
5361   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5362   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5363 
5364   if (scall == MAT_INITIAL_MATRIX) {
5365     /* put together the new matrix */
5366     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5367 
5368     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5369     /* Since these are PETSc arrays, change flags to free them as necessary. */
5370     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5371     b_oth->free_a  = PETSC_TRUE;
5372     b_oth->free_ij = PETSC_TRUE;
5373     b_oth->nonew   = 0;
5374 
5375     ierr = PetscFree(bufj);CHKERRQ(ierr);
5376     if (!startsj_s || !bufa_ptr) {
5377       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5378       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5379     } else {
5380       *startsj_s = sstartsj;
5381       *startsj_r = rstartsj;
5382       *bufa_ptr  = bufa;
5383     }
5384   }
5385   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5386   PetscFunctionReturn(0);
5387 }
5388 
5389 /*@C
5390   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5391 
5392   Not Collective
5393 
5394   Input Parameters:
5395 . A - The matrix in mpiaij format
5396 
5397   Output Parameter:
5398 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5399 . colmap - A map from global column index to local index into lvec
5400 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5401 
5402   Level: developer
5403 
5404 @*/
5405 #if defined(PETSC_USE_CTABLE)
5406 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5407 #else
5408 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5409 #endif
5410 {
5411   Mat_MPIAIJ *a;
5412 
5413   PetscFunctionBegin;
5414   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5415   PetscValidPointer(lvec, 2);
5416   PetscValidPointer(colmap, 3);
5417   PetscValidPointer(multScatter, 4);
5418   a = (Mat_MPIAIJ*) A->data;
5419   if (lvec) *lvec = a->lvec;
5420   if (colmap) *colmap = a->colmap;
5421   if (multScatter) *multScatter = a->Mvctx;
5422   PetscFunctionReturn(0);
5423 }
5424 
5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5427 #if defined(PETSC_HAVE_MKL_SPARSE)
5428 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5429 #endif
5430 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5431 #if defined(PETSC_HAVE_ELEMENTAL)
5432 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5433 #endif
5434 #if defined(PETSC_HAVE_HYPRE)
5435 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5436 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5437 #endif
5438 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5439 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5440 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5441 
5442 /*
5443     Computes (B'*A')' since computing B*A directly is untenable
5444 
5445                n                       p                          p
5446         (              )       (              )         (                  )
5447       m (      A       )  *  n (       B      )   =   m (         C        )
5448         (              )       (              )         (                  )
5449 
5450 */
5451 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5452 {
5453   PetscErrorCode ierr;
5454   Mat            At,Bt,Ct;
5455 
5456   PetscFunctionBegin;
5457   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5458   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5459   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5460   ierr = MatDestroy(&At);CHKERRQ(ierr);
5461   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5462   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5463   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5468 {
5469   PetscErrorCode ierr;
5470   PetscInt       m=A->rmap->n,n=B->cmap->n;
5471   Mat            Cmat;
5472 
5473   PetscFunctionBegin;
5474   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5475   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5476   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5477   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5478   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5479   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5480   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5481   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5482 
5483   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5484 
5485   *C = Cmat;
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 /* ----------------------------------------------------------------*/
5490 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5491 {
5492   PetscErrorCode ierr;
5493 
5494   PetscFunctionBegin;
5495   if (scall == MAT_INITIAL_MATRIX) {
5496     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5497     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5498     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5499   }
5500   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5501   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5502   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5503   PetscFunctionReturn(0);
5504 }
5505 
5506 /*MC
5507    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5508 
5509    Options Database Keys:
5510 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5511 
5512   Level: beginner
5513 
5514 .seealso: MatCreateAIJ()
5515 M*/
5516 
5517 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5518 {
5519   Mat_MPIAIJ     *b;
5520   PetscErrorCode ierr;
5521   PetscMPIInt    size;
5522 
5523   PetscFunctionBegin;
5524   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5525 
5526   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5527   B->data       = (void*)b;
5528   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5529   B->assembled  = PETSC_FALSE;
5530   B->insertmode = NOT_SET_VALUES;
5531   b->size       = size;
5532 
5533   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5534 
5535   /* build cache for off array entries formed */
5536   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5537 
5538   b->donotstash  = PETSC_FALSE;
5539   b->colmap      = 0;
5540   b->garray      = 0;
5541   b->roworiented = PETSC_TRUE;
5542 
5543   /* stuff used for matrix vector multiply */
5544   b->lvec  = NULL;
5545   b->Mvctx = NULL;
5546 
5547   /* stuff for MatGetRow() */
5548   b->rowindices   = 0;
5549   b->rowvalues    = 0;
5550   b->getrowactive = PETSC_FALSE;
5551 
5552   /* flexible pointer used in CUSP/CUSPARSE classes */
5553   b->spptr = NULL;
5554 
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5564 #if defined(PETSC_HAVE_MKL_SPARSE)
5565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5566 #endif
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5569 #if defined(PETSC_HAVE_ELEMENTAL)
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5571 #endif
5572 #if defined(PETSC_HAVE_HYPRE)
5573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5574 #endif
5575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5580 #if defined(PETSC_HAVE_HYPRE)
5581   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5582 #endif
5583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5584   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 /*@C
5589      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5590          and "off-diagonal" part of the matrix in CSR format.
5591 
5592    Collective on MPI_Comm
5593 
5594    Input Parameters:
5595 +  comm - MPI communicator
5596 .  m - number of local rows (Cannot be PETSC_DECIDE)
5597 .  n - This value should be the same as the local size used in creating the
5598        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5599        calculated if N is given) For square matrices n is almost always m.
5600 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5601 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5602 .   i - row indices for "diagonal" portion of matrix
5603 .   j - column indices
5604 .   a - matrix values
5605 .   oi - row indices for "off-diagonal" portion of matrix
5606 .   oj - column indices
5607 -   oa - matrix values
5608 
5609    Output Parameter:
5610 .   mat - the matrix
5611 
5612    Level: advanced
5613 
5614    Notes:
5615        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5616        must free the arrays once the matrix has been destroyed and not before.
5617 
5618        The i and j indices are 0 based
5619 
5620        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5621 
5622        This sets local rows and cannot be used to set off-processor values.
5623 
5624        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5625        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5626        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5627        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5628        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5629        communication if it is known that only local entries will be set.
5630 
5631 .keywords: matrix, aij, compressed row, sparse, parallel
5632 
5633 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5634           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5635 @*/
5636 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5637 {
5638   PetscErrorCode ierr;
5639   Mat_MPIAIJ     *maij;
5640 
5641   PetscFunctionBegin;
5642   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5643   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5644   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5645   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5646   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5647   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5648   maij = (Mat_MPIAIJ*) (*mat)->data;
5649 
5650   (*mat)->preallocated = PETSC_TRUE;
5651 
5652   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5653   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5654 
5655   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5656   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5657 
5658   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5660   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5661   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5662 
5663   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5664   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5665   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5666   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5667   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5668   PetscFunctionReturn(0);
5669 }
5670 
5671 /*
5672     Special version for direct calls from Fortran
5673 */
5674 #include <petsc/private/fortranimpl.h>
5675 
5676 /* Change these macros so can be used in void function */
5677 #undef CHKERRQ
5678 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5679 #undef SETERRQ2
5680 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5681 #undef SETERRQ3
5682 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5683 #undef SETERRQ
5684 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5685 
5686 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5687 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5688 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5689 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5690 #else
5691 #endif
5692 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5693 {
5694   Mat            mat  = *mmat;
5695   PetscInt       m    = *mm, n = *mn;
5696   InsertMode     addv = *maddv;
5697   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5698   PetscScalar    value;
5699   PetscErrorCode ierr;
5700 
5701   MatCheckPreallocated(mat,1);
5702   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5703 
5704 #if defined(PETSC_USE_DEBUG)
5705   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5706 #endif
5707   {
5708     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5709     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5710     PetscBool roworiented = aij->roworiented;
5711 
5712     /* Some Variables required in the macro */
5713     Mat        A                 = aij->A;
5714     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5715     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5716     MatScalar  *aa               = a->a;
5717     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5718     Mat        B                 = aij->B;
5719     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5720     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5721     MatScalar  *ba               = b->a;
5722 
5723     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5724     PetscInt  nonew = a->nonew;
5725     MatScalar *ap1,*ap2;
5726 
5727     PetscFunctionBegin;
5728     for (i=0; i<m; i++) {
5729       if (im[i] < 0) continue;
5730 #if defined(PETSC_USE_DEBUG)
5731       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5732 #endif
5733       if (im[i] >= rstart && im[i] < rend) {
5734         row      = im[i] - rstart;
5735         lastcol1 = -1;
5736         rp1      = aj + ai[row];
5737         ap1      = aa + ai[row];
5738         rmax1    = aimax[row];
5739         nrow1    = ailen[row];
5740         low1     = 0;
5741         high1    = nrow1;
5742         lastcol2 = -1;
5743         rp2      = bj + bi[row];
5744         ap2      = ba + bi[row];
5745         rmax2    = bimax[row];
5746         nrow2    = bilen[row];
5747         low2     = 0;
5748         high2    = nrow2;
5749 
5750         for (j=0; j<n; j++) {
5751           if (roworiented) value = v[i*n+j];
5752           else value = v[i+j*m];
5753           if (in[j] >= cstart && in[j] < cend) {
5754             col = in[j] - cstart;
5755             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5756             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5757           } else if (in[j] < 0) continue;
5758 #if defined(PETSC_USE_DEBUG)
5759           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5760           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5761 #endif
5762           else {
5763             if (mat->was_assembled) {
5764               if (!aij->colmap) {
5765                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5766               }
5767 #if defined(PETSC_USE_CTABLE)
5768               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5769               col--;
5770 #else
5771               col = aij->colmap[in[j]] - 1;
5772 #endif
5773               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5774               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5775                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5776                 col  =  in[j];
5777                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5778                 B     = aij->B;
5779                 b     = (Mat_SeqAIJ*)B->data;
5780                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5781                 rp2   = bj + bi[row];
5782                 ap2   = ba + bi[row];
5783                 rmax2 = bimax[row];
5784                 nrow2 = bilen[row];
5785                 low2  = 0;
5786                 high2 = nrow2;
5787                 bm    = aij->B->rmap->n;
5788                 ba    = b->a;
5789               }
5790             } else col = in[j];
5791             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5792           }
5793         }
5794       } else if (!aij->donotstash) {
5795         if (roworiented) {
5796           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5797         } else {
5798           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5799         }
5800       }
5801     }
5802   }
5803   PetscFunctionReturnVoid();
5804 }
5805 
5806