xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c87ba875e4007ad659b117ea274f03d5f4cd5ea7)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844         i    = j;
845       }
846     }
847     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
848   }
849 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
850   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
851   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
852   if (mat->boundtocpu) {
853     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
854     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
855   }
856 #endif
857   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
858   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
859 
860   /* determine if any processor has disassembled, if so we must
861      also disassemble ourself, in order that we may reassemble. */
862   /*
863      if nonzero structure of submatrix B cannot change then we know that
864      no processor disassembled thus we can skip this stuff
865   */
866   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
867     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
868     if (mat->was_assembled && !other_disassembled) {
869 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
870       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
871 #endif
872       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
873     }
874   }
875   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
876     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
877   }
878   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
879 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
880   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
881 #endif
882   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
883   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
884 
885   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
886 
887   aij->rowvalues = 0;
888 
889   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
890   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
891 
892   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
893   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
894     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
895     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
896   }
897 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
898   mat->offloadmask = PETSC_OFFLOAD_BOTH;
899 #endif
900   PetscFunctionReturn(0);
901 }
902 
903 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
904 {
905   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
910   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
915 {
916   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
917   PetscObjectState sA, sB;
918   PetscInt        *lrows;
919   PetscInt         r, len;
920   PetscBool        cong, lch, gch;
921   PetscErrorCode   ierr;
922 
923   PetscFunctionBegin;
924   /* get locally owned rows */
925   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
926   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
927   /* fix right hand side if needed */
928   if (x && b) {
929     const PetscScalar *xx;
930     PetscScalar       *bb;
931 
932     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
933     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
934     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
935     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
936     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
937     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
938   }
939 
940   sA = mat->A->nonzerostate;
941   sB = mat->B->nonzerostate;
942 
943   if (diag != 0.0 && cong) {
944     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
945     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
946   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
947     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
948     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
949     PetscInt   nnwA, nnwB;
950     PetscBool  nnzA, nnzB;
951 
952     nnwA = aijA->nonew;
953     nnwB = aijB->nonew;
954     nnzA = aijA->keepnonzeropattern;
955     nnzB = aijB->keepnonzeropattern;
956     if (!nnzA) {
957       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
958       aijA->nonew = 0;
959     }
960     if (!nnzB) {
961       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
962       aijB->nonew = 0;
963     }
964     /* Must zero here before the next loop */
965     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
966     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
967     for (r = 0; r < len; ++r) {
968       const PetscInt row = lrows[r] + A->rmap->rstart;
969       if (row >= A->cmap->N) continue;
970       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
971     }
972     aijA->nonew = nnwA;
973     aijB->nonew = nnwB;
974   } else {
975     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
976     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(lrows);CHKERRQ(ierr);
979   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981 
982   /* reduce nonzerostate */
983   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
984   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
985   if (gch) A->nonzerostate++;
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
990 {
991   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
992   PetscErrorCode    ierr;
993   PetscMPIInt       n = A->rmap->n;
994   PetscInt          i,j,r,m,len = 0;
995   PetscInt          *lrows,*owners = A->rmap->range;
996   PetscMPIInt       p = 0;
997   PetscSFNode       *rrows;
998   PetscSF           sf;
999   const PetscScalar *xx;
1000   PetscScalar       *bb,*mask;
1001   Vec               xmask,lmask;
1002   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
1003   const PetscInt    *aj, *ii,*ridx;
1004   PetscScalar       *aa;
1005 
1006   PetscFunctionBegin;
1007   /* Create SF where leaves are input rows and roots are owned rows */
1008   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1009   for (r = 0; r < n; ++r) lrows[r] = -1;
1010   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1011   for (r = 0; r < N; ++r) {
1012     const PetscInt idx   = rows[r];
1013     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1014     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1015       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1016     }
1017     rrows[r].rank  = p;
1018     rrows[r].index = rows[r] - owners[p];
1019   }
1020   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1021   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1022   /* Collect flags for rows to be zeroed */
1023   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1024   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1025   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1026   /* Compress and put in row numbers */
1027   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1028   /* zero diagonal part of matrix */
1029   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1030   /* handle off diagonal part of matrix */
1031   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1032   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1033   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1034   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1035   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1036   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1039   if (x && b) { /* this code is buggy when the row and column layout don't match */
1040     PetscBool cong;
1041 
1042     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1043     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1044     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1045     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1047     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1048   }
1049   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1050   /* remove zeroed rows of off diagonal matrix */
1051   ii = aij->i;
1052   for (i=0; i<len; i++) {
1053     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1054   }
1055   /* loop over all elements of off process part of matrix zeroing removed columns*/
1056   if (aij->compressedrow.use) {
1057     m    = aij->compressedrow.nrows;
1058     ii   = aij->compressedrow.i;
1059     ridx = aij->compressedrow.rindex;
1060     for (i=0; i<m; i++) {
1061       n  = ii[i+1] - ii[i];
1062       aj = aij->j + ii[i];
1063       aa = aij->a + ii[i];
1064 
1065       for (j=0; j<n; j++) {
1066         if (PetscAbsScalar(mask[*aj])) {
1067           if (b) bb[*ridx] -= *aa*xx[*aj];
1068           *aa = 0.0;
1069         }
1070         aa++;
1071         aj++;
1072       }
1073       ridx++;
1074     }
1075   } else { /* do not use compressed row format */
1076     m = l->B->rmap->n;
1077     for (i=0; i<m; i++) {
1078       n  = ii[i+1] - ii[i];
1079       aj = aij->j + ii[i];
1080       aa = aij->a + ii[i];
1081       for (j=0; j<n; j++) {
1082         if (PetscAbsScalar(mask[*aj])) {
1083           if (b) bb[i] -= *aa*xx[*aj];
1084           *aa = 0.0;
1085         }
1086         aa++;
1087         aj++;
1088       }
1089     }
1090   }
1091   if (x && b) {
1092     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1093     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1094   }
1095   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1096   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1097   ierr = PetscFree(lrows);CHKERRQ(ierr);
1098 
1099   /* only change matrix nonzero state if pattern was allowed to be changed */
1100   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1101     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1102     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       nt;
1112   VecScatter     Mvctx = a->Mvctx;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1116   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1117 
1118   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1120   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   VecScatter     Mvctx = a->Mvctx;
1140 
1141   PetscFunctionBegin;
1142   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1143   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1145   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1151 {
1152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   /* do nondiagonal part */
1157   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1158   /* do local part */
1159   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1160   /* add partial results together */
1161   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1162   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1167 {
1168   MPI_Comm       comm;
1169   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1170   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1171   IS             Me,Notme;
1172   PetscErrorCode ierr;
1173   PetscInt       M,N,first,last,*notme,i;
1174   PetscBool      lf;
1175   PetscMPIInt    size;
1176 
1177   PetscFunctionBegin;
1178   /* Easy test: symmetric diagonal block */
1179   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1180   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1181   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1182   if (!*f) PetscFunctionReturn(0);
1183   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1185   if (size == 1) PetscFunctionReturn(0);
1186 
1187   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1188   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1189   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1190   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1191   for (i=0; i<first; i++) notme[i] = i;
1192   for (i=last; i<M; i++) notme[i-last+first] = i;
1193   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1194   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1195   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1196   Aoff = Aoffs[0];
1197   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1198   Boff = Boffs[0];
1199   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1200   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1201   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1202   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1203   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1204   ierr = PetscFree(notme);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1209 {
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1218 {
1219   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   /* do nondiagonal part */
1224   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1225   /* do local part */
1226   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1227   /* add partial results together */
1228   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*
1234   This only works correctly for square matrices where the subblock A->A is the
1235    diagonal block
1236 */
1237 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1238 {
1239   PetscErrorCode ierr;
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241 
1242   PetscFunctionBegin;
1243   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1244   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1245   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1250 {
1251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1256   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1261 {
1262   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266 #if defined(PETSC_USE_LOG)
1267   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1268 #endif
1269   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1271   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1272   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1273 #if defined(PETSC_USE_CTABLE)
1274   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1275 #else
1276   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1277 #endif
1278   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1279   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1280   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1281   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1282   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1283   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1284   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1285 
1286   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1296 #if defined(PETSC_HAVE_ELEMENTAL)
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1298 #endif
1299 #if defined(PETSC_HAVE_HYPRE)
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1302 #endif
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1314   const PetscInt    *garray = aij->garray;
1315   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1316   PetscInt          *rowlens;
1317   PetscInt          *colidxs;
1318   PetscScalar       *matvals;
1319   PetscErrorCode    ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1323 
1324   M  = mat->rmap->N;
1325   N  = mat->cmap->N;
1326   m  = mat->rmap->n;
1327   rs = mat->rmap->rstart;
1328   cs = mat->cmap->rstart;
1329   nz = A->nz + B->nz;
1330 
1331   /* write matrix header */
1332   header[0] = MAT_FILE_CLASSID;
1333   header[1] = M; header[2] = N; header[3] = nz;
1334   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1335   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1336 
1337   /* fill in and store row lengths  */
1338   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1339   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1340   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1341   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1342 
1343   /* fill in and store column indices */
1344   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1345   for (cnt=0, i=0; i<m; i++) {
1346     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1347       if (garray[B->j[jb]] > cs) break;
1348       colidxs[cnt++] = garray[B->j[jb]];
1349     }
1350     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1351       colidxs[cnt++] = A->j[ja] + cs;
1352     for (; jb<B->i[i+1]; jb++)
1353       colidxs[cnt++] = garray[B->j[jb]];
1354   }
1355   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1356   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1357   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1358 
1359   /* fill in and store nonzero values */
1360   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1361   for (cnt=0, i=0; i<m; i++) {
1362     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1363       if (garray[B->j[jb]] > cs) break;
1364       matvals[cnt++] = B->a[jb];
1365     }
1366     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1367       matvals[cnt++] = A->a[ja];
1368     for (; jb<B->i[i+1]; jb++)
1369       matvals[cnt++] = B->a[jb];
1370   }
1371   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1372   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1373   ierr = PetscFree(matvals);CHKERRQ(ierr);
1374 
1375   /* write block size option to the viewer's .info file */
1376   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #include <petscdraw.h>
1381 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1382 {
1383   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1384   PetscErrorCode    ierr;
1385   PetscMPIInt       rank = aij->rank,size = aij->size;
1386   PetscBool         isdraw,iascii,isbinary;
1387   PetscViewer       sviewer;
1388   PetscViewerFormat format;
1389 
1390   PetscFunctionBegin;
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1394   if (iascii) {
1395     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1396     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1397       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1398       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1399       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1400       for (i=0; i<(PetscInt)size; i++) {
1401         nmax = PetscMax(nmax,nz[i]);
1402         nmin = PetscMin(nmin,nz[i]);
1403         navg += nz[i];
1404       }
1405       ierr = PetscFree(nz);CHKERRQ(ierr);
1406       navg = navg/size;
1407       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1408       PetscFunctionReturn(0);
1409     }
1410     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1411     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1412       MatInfo   info;
1413       PetscBool inodes;
1414 
1415       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1416       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1418       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1419       if (!inodes) {
1420         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1421                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1422       } else {
1423         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1424                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1425       }
1426       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1428       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1429       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1430       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1431       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1433       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1434       PetscFunctionReturn(0);
1435     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1436       PetscInt inodecount,inodelimit,*inodes;
1437       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1438       if (inodes) {
1439         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1440       } else {
1441         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1442       }
1443       PetscFunctionReturn(0);
1444     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1445       PetscFunctionReturn(0);
1446     }
1447   } else if (isbinary) {
1448     if (size == 1) {
1449       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1450       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1451     } else {
1452       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1453     }
1454     PetscFunctionReturn(0);
1455   } else if (iascii && size == 1) {
1456     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1457     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1458     PetscFunctionReturn(0);
1459   } else if (isdraw) {
1460     PetscDraw draw;
1461     PetscBool isnull;
1462     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1463     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1464     if (isnull) PetscFunctionReturn(0);
1465   }
1466 
1467   { /* assemble the entire matrix onto first processor */
1468     Mat A = NULL, Av;
1469     IS  isrow,iscol;
1470 
1471     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1473     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1474     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1475 /*  The commented code uses MatCreateSubMatrices instead */
1476 /*
1477     Mat *AA, A = NULL, Av;
1478     IS  isrow,iscol;
1479 
1480     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1482     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1483     if (!rank) {
1484        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1485        A    = AA[0];
1486        Av   = AA[0];
1487     }
1488     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1489 */
1490     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1491     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1492     /*
1493        Everyone has to call to draw the matrix since the graphics waits are
1494        synchronized across all processors that share the PetscDraw object
1495     */
1496     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1497     if (!rank) {
1498       if (((PetscObject)mat)->name) {
1499         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1500       }
1501       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1502     }
1503     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1504     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1505     ierr = MatDestroy(&A);CHKERRQ(ierr);
1506   }
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1511 {
1512   PetscErrorCode ierr;
1513   PetscBool      iascii,isdraw,issocket,isbinary;
1514 
1515   PetscFunctionBegin;
1516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1520   if (iascii || isdraw || isbinary || issocket) {
1521     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1527 {
1528   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1529   PetscErrorCode ierr;
1530   Vec            bb1 = 0;
1531   PetscBool      hasop;
1532 
1533   PetscFunctionBegin;
1534   if (flag == SOR_APPLY_UPPER) {
1535     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1536     PetscFunctionReturn(0);
1537   }
1538 
1539   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1540     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1541   }
1542 
1543   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1544     if (flag & SOR_ZERO_INITIAL_GUESS) {
1545       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1546       its--;
1547     }
1548 
1549     while (its--) {
1550       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552 
1553       /* update rhs: bb1 = bb - B*x */
1554       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1555       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1556 
1557       /* local sweep */
1558       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1559     }
1560   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1561     if (flag & SOR_ZERO_INITIAL_GUESS) {
1562       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1563       its--;
1564     }
1565     while (its--) {
1566       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568 
1569       /* update rhs: bb1 = bb - B*x */
1570       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1571       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1572 
1573       /* local sweep */
1574       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1575     }
1576   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1577     if (flag & SOR_ZERO_INITIAL_GUESS) {
1578       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1579       its--;
1580     }
1581     while (its--) {
1582       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584 
1585       /* update rhs: bb1 = bb - B*x */
1586       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1587       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1588 
1589       /* local sweep */
1590       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1591     }
1592   } else if (flag & SOR_EISENSTAT) {
1593     Vec xx1;
1594 
1595     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1596     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1597 
1598     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     if (!mat->diag) {
1601       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1602       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1603     }
1604     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1605     if (hasop) {
1606       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1607     } else {
1608       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1609     }
1610     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1611 
1612     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1613 
1614     /* local sweep */
1615     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1616     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1617     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1618   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1619 
1620   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1621 
1622   matin->factorerrortype = mat->A->factorerrortype;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1627 {
1628   Mat            aA,aB,Aperm;
1629   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1630   PetscScalar    *aa,*ba;
1631   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1632   PetscSF        rowsf,sf;
1633   IS             parcolp = NULL;
1634   PetscBool      done;
1635   PetscErrorCode ierr;
1636 
1637   PetscFunctionBegin;
1638   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1639   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1640   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1641   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1642 
1643   /* Invert row permutation to find out where my rows should go */
1644   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1645   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1646   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1647   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1648   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1649   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650 
1651   /* Invert column permutation to find out where my columns should go */
1652   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1653   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1654   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1655   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1656   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1657   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1659 
1660   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1661   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1662   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1663 
1664   /* Find out where my gcols should go */
1665   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1666   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1667   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1668   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1669   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1670   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1673 
1674   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1675   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1677   for (i=0; i<m; i++) {
1678     PetscInt    row = rdest[i];
1679     PetscMPIInt rowner;
1680     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1681     for (j=ai[i]; j<ai[i+1]; j++) {
1682       PetscInt    col = cdest[aj[j]];
1683       PetscMPIInt cowner;
1684       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1685       if (rowner == cowner) dnnz[i]++;
1686       else onnz[i]++;
1687     }
1688     for (j=bi[i]; j<bi[i+1]; j++) {
1689       PetscInt    col = gcdest[bj[j]];
1690       PetscMPIInt cowner;
1691       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1692       if (rowner == cowner) dnnz[i]++;
1693       else onnz[i]++;
1694     }
1695   }
1696   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1701 
1702   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1703   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1705   for (i=0; i<m; i++) {
1706     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1707     PetscInt j0,rowlen;
1708     rowlen = ai[i+1] - ai[i];
1709     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1710       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1711       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1712     }
1713     rowlen = bi[i+1] - bi[i];
1714     for (j0=j=0; j<rowlen; j0=j) {
1715       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1716       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1717     }
1718   }
1719   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1723   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1725   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1726   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1727   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1728   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1729   *B = Aperm;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1734 {
1735   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1740   if (ghosts) *ghosts = aij->garray;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1745 {
1746   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1747   Mat            A    = mat->A,B = mat->B;
1748   PetscErrorCode ierr;
1749   PetscLogDouble isend[5],irecv[5];
1750 
1751   PetscFunctionBegin;
1752   info->block_size = 1.0;
1753   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1754 
1755   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1756   isend[3] = info->memory;  isend[4] = info->mallocs;
1757 
1758   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1759 
1760   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1761   isend[3] += info->memory;  isend[4] += info->mallocs;
1762   if (flag == MAT_LOCAL) {
1763     info->nz_used      = isend[0];
1764     info->nz_allocated = isend[1];
1765     info->nz_unneeded  = isend[2];
1766     info->memory       = isend[3];
1767     info->mallocs      = isend[4];
1768   } else if (flag == MAT_GLOBAL_MAX) {
1769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1770 
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   } else if (flag == MAT_GLOBAL_SUM) {
1777     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1778 
1779     info->nz_used      = irecv[0];
1780     info->nz_allocated = irecv[1];
1781     info->nz_unneeded  = irecv[2];
1782     info->memory       = irecv[3];
1783     info->mallocs      = irecv[4];
1784   }
1785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1786   info->fill_ratio_needed = 0;
1787   info->factor_mallocs    = 0;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1792 {
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   switch (op) {
1798   case MAT_NEW_NONZERO_LOCATIONS:
1799   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801   case MAT_KEEP_NONZERO_PATTERN:
1802   case MAT_NEW_NONZERO_LOCATION_ERR:
1803   case MAT_USE_INODES:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     MatCheckPreallocated(A,1);
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_ROW_ORIENTED:
1810     MatCheckPreallocated(A,1);
1811     a->roworiented = flg;
1812 
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_NEW_DIAGONALS:
1817   case MAT_SORTED_FULL:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1824   case MAT_SPD:
1825   case MAT_SYMMETRIC:
1826   case MAT_STRUCTURALLY_SYMMETRIC:
1827   case MAT_HERMITIAN:
1828   case MAT_SYMMETRY_ETERNAL:
1829     break;
1830   case MAT_SUBMAT_SINGLEIS:
1831     A->submat_singleis = flg;
1832     break;
1833   case MAT_STRUCTURE_ONLY:
1834     /* The option is handled directly by MatSetOption() */
1835     break;
1836   default:
1837     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1843 {
1844   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1845   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1846   PetscErrorCode ierr;
1847   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1848   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1849   PetscInt       *cmap,*idx_p;
1850 
1851   PetscFunctionBegin;
1852   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1853   mat->getrowactive = PETSC_TRUE;
1854 
1855   if (!mat->rowvalues && (idx || v)) {
1856     /*
1857         allocate enough space to hold information from the longest row.
1858     */
1859     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1860     PetscInt   max = 1,tmp;
1861     for (i=0; i<matin->rmap->n; i++) {
1862       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1863       if (max < tmp) max = tmp;
1864     }
1865     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1866   }
1867 
1868   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1869   lrow = row - rstart;
1870 
1871   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1872   if (!v)   {pvA = 0; pvB = 0;}
1873   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1874   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1875   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1876   nztot = nzA + nzB;
1877 
1878   cmap = mat->garray;
1879   if (v  || idx) {
1880     if (nztot) {
1881       /* Sort by increasing column numbers, assuming A and B already sorted */
1882       PetscInt imark = -1;
1883       if (v) {
1884         *v = v_p = mat->rowvalues;
1885         for (i=0; i<nzB; i++) {
1886           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1887           else break;
1888         }
1889         imark = i;
1890         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1891         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1892       }
1893       if (idx) {
1894         *idx = idx_p = mat->rowindices;
1895         if (imark > -1) {
1896           for (i=0; i<imark; i++) {
1897             idx_p[i] = cmap[cworkB[i]];
1898           }
1899         } else {
1900           for (i=0; i<nzB; i++) {
1901             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1902             else break;
1903           }
1904           imark = i;
1905         }
1906         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1907         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1908       }
1909     } else {
1910       if (idx) *idx = 0;
1911       if (v)   *v   = 0;
1912     }
1913   }
1914   *nz  = nztot;
1915   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1916   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1921 {
1922   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1923 
1924   PetscFunctionBegin;
1925   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1926   aij->getrowactive = PETSC_FALSE;
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1931 {
1932   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1933   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       i,j,cstart = mat->cmap->rstart;
1936   PetscReal      sum = 0.0;
1937   MatScalar      *v;
1938 
1939   PetscFunctionBegin;
1940   if (aij->size == 1) {
1941     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1942   } else {
1943     if (type == NORM_FROBENIUS) {
1944       v = amat->a;
1945       for (i=0; i<amat->nz; i++) {
1946         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1947       }
1948       v = bmat->a;
1949       for (i=0; i<bmat->nz; i++) {
1950         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1951       }
1952       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1953       *norm = PetscSqrtReal(*norm);
1954       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1955     } else if (type == NORM_1) { /* max column norm */
1956       PetscReal *tmp,*tmp2;
1957       PetscInt  *jj,*garray = aij->garray;
1958       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1959       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1960       *norm = 0.0;
1961       v     = amat->a; jj = amat->j;
1962       for (j=0; j<amat->nz; j++) {
1963         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1964       }
1965       v = bmat->a; jj = bmat->j;
1966       for (j=0; j<bmat->nz; j++) {
1967         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1968       }
1969       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1970       for (j=0; j<mat->cmap->N; j++) {
1971         if (tmp2[j] > *norm) *norm = tmp2[j];
1972       }
1973       ierr = PetscFree(tmp);CHKERRQ(ierr);
1974       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1975       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1976     } else if (type == NORM_INFINITY) { /* max row norm */
1977       PetscReal ntemp = 0.0;
1978       for (j=0; j<aij->A->rmap->n; j++) {
1979         v   = amat->a + amat->i[j];
1980         sum = 0.0;
1981         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1982           sum += PetscAbsScalar(*v); v++;
1983         }
1984         v = bmat->a + bmat->i[j];
1985         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1986           sum += PetscAbsScalar(*v); v++;
1987         }
1988         if (sum > ntemp) ntemp = sum;
1989       }
1990       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1991       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1992     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1998 {
1999   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2000   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2001   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2002   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2003   PetscErrorCode  ierr;
2004   Mat             B,A_diag,*B_diag;
2005   const MatScalar *array;
2006 
2007   PetscFunctionBegin;
2008   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2009   ai = Aloc->i; aj = Aloc->j;
2010   bi = Bloc->i; bj = Bloc->j;
2011   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2012     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2013     PetscSFNode          *oloc;
2014     PETSC_UNUSED PetscSF sf;
2015 
2016     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2017     /* compute d_nnz for preallocation */
2018     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2019     for (i=0; i<ai[ma]; i++) {
2020       d_nnz[aj[i]]++;
2021     }
2022     /* compute local off-diagonal contributions */
2023     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2024     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2025     /* map those to global */
2026     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2027     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2028     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2029     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2030     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2031     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2033 
2034     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2035     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2036     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2037     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2038     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2039     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2040   } else {
2041     B    = *matout;
2042     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2043   }
2044 
2045   b           = (Mat_MPIAIJ*)B->data;
2046   A_diag      = a->A;
2047   B_diag      = &b->A;
2048   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2049   A_diag_ncol = A_diag->cmap->N;
2050   B_diag_ilen = sub_B_diag->ilen;
2051   B_diag_i    = sub_B_diag->i;
2052 
2053   /* Set ilen for diagonal of B */
2054   for (i=0; i<A_diag_ncol; i++) {
2055     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2056   }
2057 
2058   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2059   very quickly (=without using MatSetValues), because all writes are local. */
2060   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2061 
2062   /* copy over the B part */
2063   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2064   array = Bloc->a;
2065   row   = A->rmap->rstart;
2066   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2067   cols_tmp = cols;
2068   for (i=0; i<mb; i++) {
2069     ncol = bi[i+1]-bi[i];
2070     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2071     row++;
2072     array += ncol; cols_tmp += ncol;
2073   }
2074   ierr = PetscFree(cols);CHKERRQ(ierr);
2075 
2076   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2079     *matout = B;
2080   } else {
2081     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2087 {
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            a    = aij->A,b = aij->B;
2090   PetscErrorCode ierr;
2091   PetscInt       s1,s2,s3;
2092 
2093   PetscFunctionBegin;
2094   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2095   if (rr) {
2096     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2097     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2098     /* Overlap communication with computation. */
2099     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100   }
2101   if (ll) {
2102     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2103     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2104     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2105   }
2106   /* scale  the diagonal block */
2107   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2108 
2109   if (rr) {
2110     /* Do a scatter end and then right scale the off-diagonal block */
2111     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2113   }
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2118 {
2119   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2128 {
2129   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2130   Mat            a,b,c,d;
2131   PetscBool      flg;
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   a = matA->A; b = matA->B;
2136   c = matB->A; d = matB->B;
2137 
2138   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2139   if (flg) {
2140     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2141   }
2142   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2147 {
2148   PetscErrorCode ierr;
2149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2150   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2151 
2152   PetscFunctionBegin;
2153   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2154   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2155     /* because of the column compression in the off-processor part of the matrix a->B,
2156        the number of columns in a->B and b->B may be different, hence we cannot call
2157        the MatCopy() directly on the two parts. If need be, we can provide a more
2158        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2159        then copying the submatrices */
2160     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2161   } else {
2162     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2163     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2164   }
2165   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2170 {
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 /*
2179    Computes the number of nonzeros per row needed for preallocation when X and Y
2180    have different nonzero structure.
2181 */
2182 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2183 {
2184   PetscInt       i,j,k,nzx,nzy;
2185 
2186   PetscFunctionBegin;
2187   /* Set the number of nonzeros in the new matrix */
2188   for (i=0; i<m; i++) {
2189     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2190     nzx = xi[i+1] - xi[i];
2191     nzy = yi[i+1] - yi[i];
2192     nnz[i] = 0;
2193     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2194       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2195       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2196       nnz[i]++;
2197     }
2198     for (; k<nzy; k++) nnz[i]++;
2199   }
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2204 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2205 {
2206   PetscErrorCode ierr;
2207   PetscInt       m = Y->rmap->N;
2208   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2209   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2217 {
2218   PetscErrorCode ierr;
2219   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2220   PetscBLASInt   bnz,one=1;
2221   Mat_SeqAIJ     *x,*y;
2222 
2223   PetscFunctionBegin;
2224   if (str == SAME_NONZERO_PATTERN) {
2225     PetscScalar alpha = a;
2226     x    = (Mat_SeqAIJ*)xx->A->data;
2227     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2228     y    = (Mat_SeqAIJ*)yy->A->data;
2229     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2230     x    = (Mat_SeqAIJ*)xx->B->data;
2231     y    = (Mat_SeqAIJ*)yy->B->data;
2232     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2233     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2234     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2235     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2236        will be updated */
2237 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2238     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2239       Y->offloadmask = PETSC_OFFLOAD_CPU;
2240     }
2241 #endif
2242   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2243     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2244   } else {
2245     Mat      B;
2246     PetscInt *nnz_d,*nnz_o;
2247     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2248     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2249     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2250     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2251     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2252     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,*idxb = 0;
2309   PetscScalar    *va,*vb;
2310   Vec            vtmp;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2314   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2315   if (idx) {
2316     for (i=0; i<A->rmap->n; i++) {
2317       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2318     }
2319   }
2320 
2321   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2322   if (idx) {
2323     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2324   }
2325   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2326   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2327 
2328   for (i=0; i<A->rmap->n; i++) {
2329     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2330       va[i] = vb[i];
2331       if (idx) idx[i] = a->garray[idxb[i]];
2332     }
2333   }
2334 
2335   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2337   ierr = PetscFree(idxb);CHKERRQ(ierr);
2338   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346   PetscInt       i,*idxb = 0;
2347   PetscScalar    *va,*vb;
2348   Vec            vtmp;
2349 
2350   PetscFunctionBegin;
2351   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2353   if (idx) {
2354     for (i=0; i<A->cmap->n; i++) {
2355       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2356     }
2357   }
2358 
2359   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2360   if (idx) {
2361     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2362   }
2363   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2364   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2365 
2366   for (i=0; i<A->rmap->n; i++) {
2367     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2368       va[i] = vb[i];
2369       if (idx) idx[i] = a->garray[idxb[i]];
2370     }
2371   }
2372 
2373   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2375   ierr = PetscFree(idxb);CHKERRQ(ierr);
2376   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2381 {
2382   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2383   PetscInt       n      = A->rmap->n;
2384   PetscInt       cstart = A->cmap->rstart;
2385   PetscInt       *cmap  = mat->garray;
2386   PetscInt       *diagIdx, *offdiagIdx;
2387   Vec            diagV, offdiagV;
2388   PetscScalar    *a, *diagA, *offdiagA;
2389   PetscInt       r;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2395   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2396   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2397   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2399   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2400   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2401   for (r = 0; r < n; ++r) {
2402     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2403       a[r]   = diagA[r];
2404       idx[r] = cstart + diagIdx[r];
2405     } else {
2406       a[r]   = offdiagA[r];
2407       idx[r] = cmap[offdiagIdx[r]];
2408     }
2409   }
2410   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2412   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2413   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2414   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2415   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2420 {
2421   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2422   PetscInt       n      = A->rmap->n;
2423   PetscInt       cstart = A->cmap->rstart;
2424   PetscInt       *cmap  = mat->garray;
2425   PetscInt       *diagIdx, *offdiagIdx;
2426   Vec            diagV, offdiagV;
2427   PetscScalar    *a, *diagA, *offdiagA;
2428   PetscInt       r;
2429   PetscErrorCode ierr;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2433   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2434   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2435   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2436   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2437   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   for (r = 0; r < n; ++r) {
2441     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2442       a[r]   = diagA[r];
2443       idx[r] = cstart + diagIdx[r];
2444     } else {
2445       a[r]   = offdiagA[r];
2446       idx[r] = cmap[offdiagIdx[r]];
2447     }
2448   }
2449   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2453   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2454   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2459 {
2460   PetscErrorCode ierr;
2461   Mat            *dummy;
2462 
2463   PetscFunctionBegin;
2464   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2465   *newmat = *dummy;
2466   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2471 {
2472   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2473   PetscErrorCode ierr;
2474 
2475   PetscFunctionBegin;
2476   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2477   A->factorerrortype = a->A->factorerrortype;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2482 {
2483   PetscErrorCode ierr;
2484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2485 
2486   PetscFunctionBegin;
2487   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2488   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2489   if (x->assembled) {
2490     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2491   } else {
2492     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2493   }
2494   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2500 {
2501   PetscFunctionBegin;
2502   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2503   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 /*@
2508    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2509 
2510    Collective on Mat
2511 
2512    Input Parameters:
2513 +    A - the matrix
2514 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2515 
2516  Level: advanced
2517 
2518 @*/
2519 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2520 {
2521   PetscErrorCode       ierr;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2529 {
2530   PetscErrorCode       ierr;
2531   PetscBool            sc = PETSC_FALSE,flg;
2532 
2533   PetscFunctionBegin;
2534   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2535   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2536   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2537   if (flg) {
2538     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2539   }
2540   ierr = PetscOptionsTail();CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2545 {
2546   PetscErrorCode ierr;
2547   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2548   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2549 
2550   PetscFunctionBegin;
2551   if (!Y->preallocated) {
2552     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2553   } else if (!aij->nz) {
2554     PetscInt nonew = aij->nonew;
2555     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2556     aij->nonew = nonew;
2557   }
2558   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2569   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2570   if (d) {
2571     PetscInt rstart;
2572     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2573     *d += rstart;
2574 
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2580 {
2581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2582   PetscErrorCode ierr;
2583 
2584   PetscFunctionBegin;
2585   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 /* -------------------------------------------------------------------*/
2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2591                                        MatGetRow_MPIAIJ,
2592                                        MatRestoreRow_MPIAIJ,
2593                                        MatMult_MPIAIJ,
2594                                 /* 4*/ MatMultAdd_MPIAIJ,
2595                                        MatMultTranspose_MPIAIJ,
2596                                        MatMultTransposeAdd_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*10*/ 0,
2601                                        0,
2602                                        0,
2603                                        MatSOR_MPIAIJ,
2604                                        MatTranspose_MPIAIJ,
2605                                 /*15*/ MatGetInfo_MPIAIJ,
2606                                        MatEqual_MPIAIJ,
2607                                        MatGetDiagonal_MPIAIJ,
2608                                        MatDiagonalScale_MPIAIJ,
2609                                        MatNorm_MPIAIJ,
2610                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2611                                        MatAssemblyEnd_MPIAIJ,
2612                                        MatSetOption_MPIAIJ,
2613                                        MatZeroEntries_MPIAIJ,
2614                                 /*24*/ MatZeroRows_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*29*/ MatSetUp_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        MatGetDiagonalBlock_MPIAIJ,
2623                                        0,
2624                                 /*34*/ MatDuplicate_MPIAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*39*/ MatAXPY_MPIAIJ,
2630                                        MatCreateSubMatrices_MPIAIJ,
2631                                        MatIncreaseOverlap_MPIAIJ,
2632                                        MatGetValues_MPIAIJ,
2633                                        MatCopy_MPIAIJ,
2634                                 /*44*/ MatGetRowMax_MPIAIJ,
2635                                        MatScale_MPIAIJ,
2636                                        MatShift_MPIAIJ,
2637                                        MatDiagonalSet_MPIAIJ,
2638                                        MatZeroRowsColumns_MPIAIJ,
2639                                 /*49*/ MatSetRandom_MPIAIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2645                                        0,
2646                                        MatSetUnfactored_MPIAIJ,
2647                                        MatPermute_MPIAIJ,
2648                                        0,
2649                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2650                                        MatDestroy_MPIAIJ,
2651                                        MatView_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                 /*64*/ 0,
2655                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2660                                        MatGetRowMinAbs_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*75*/ MatFDColoringApply_AIJ,
2666                                        MatSetFromOptions_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        MatFindZeroDiagonals_MPIAIJ,
2670                                 /*80*/ 0,
2671                                        0,
2672                                        0,
2673                                 /*83*/ MatLoad_MPIAIJ,
2674                                        MatIsSymmetric_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                 /*89*/ 0,
2680                                        0,
2681                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2682                                        0,
2683                                        0,
2684                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        MatBindToCPU_MPIAIJ,
2689                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2690                                        0,
2691                                        0,
2692                                        MatConjugate_MPIAIJ,
2693                                        0,
2694                                 /*104*/MatSetValuesRow_MPIAIJ,
2695                                        MatRealPart_MPIAIJ,
2696                                        MatImaginaryPart_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                 /*109*/0,
2700                                        0,
2701                                        MatGetRowMin_MPIAIJ,
2702                                        0,
2703                                        MatMissingDiagonal_MPIAIJ,
2704                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2705                                        0,
2706                                        MatGetGhosts_MPIAIJ,
2707                                        0,
2708                                        0,
2709                                 /*119*/0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        MatGetMultiProcBlock_MPIAIJ,
2714                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2715                                        MatGetColumnNorms_MPIAIJ,
2716                                        MatInvertBlockDiagonal_MPIAIJ,
2717                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2718                                        MatCreateSubMatricesMPI_MPIAIJ,
2719                                 /*129*/0,
2720                                        0,
2721                                        0,
2722                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2723                                        0,
2724                                 /*134*/0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                 /*139*/MatSetBlockSizes_MPIAIJ,
2730                                        0,
2731                                        0,
2732                                        MatFDColoringSetUp_MPIXAIJ,
2733                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2734                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2735                                 /*145*/0,
2736                                        0,
2737                                        0
2738 };
2739 
2740 /* ----------------------------------------------------------------------------------------*/
2741 
2742 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2754 {
2755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2760   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2765 {
2766   Mat_MPIAIJ     *b;
2767   PetscErrorCode ierr;
2768   PetscMPIInt    size;
2769 
2770   PetscFunctionBegin;
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   b = (Mat_MPIAIJ*)B->data;
2774 
2775 #if defined(PETSC_USE_CTABLE)
2776   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2777 #else
2778   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2779 #endif
2780   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2781   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2782   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2783 
2784   /* Because the B will have been resized we simply destroy it and create a new one each time */
2785   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2786   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2787   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2788   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2789   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2790   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2792 
2793   if (!B->preallocated) {
2794     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2795     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2796     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2797     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2798     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2799   }
2800 
2801   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2802   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2803   B->preallocated  = PETSC_TRUE;
2804   B->was_assembled = PETSC_FALSE;
2805   B->assembled     = PETSC_FALSE;
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2816   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2818   b = (Mat_MPIAIJ*)B->data;
2819 
2820 #if defined(PETSC_USE_CTABLE)
2821   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2822 #else
2823   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2824 #endif
2825   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2826   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2827   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2828 
2829   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2830   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2831   B->preallocated  = PETSC_TRUE;
2832   B->was_assembled = PETSC_FALSE;
2833   B->assembled = PETSC_FALSE;
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2838 {
2839   Mat            mat;
2840   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2841   PetscErrorCode ierr;
2842 
2843   PetscFunctionBegin;
2844   *newmat = 0;
2845   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2846   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2847   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2848   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2849   a       = (Mat_MPIAIJ*)mat->data;
2850 
2851   mat->factortype   = matin->factortype;
2852   mat->assembled    = matin->assembled;
2853   mat->insertmode   = NOT_SET_VALUES;
2854   mat->preallocated = matin->preallocated;
2855 
2856   a->size         = oldmat->size;
2857   a->rank         = oldmat->rank;
2858   a->donotstash   = oldmat->donotstash;
2859   a->roworiented  = oldmat->roworiented;
2860   a->rowindices   = NULL;
2861   a->rowvalues    = NULL;
2862   a->getrowactive = PETSC_FALSE;
2863 
2864   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2865   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2866 
2867   if (oldmat->colmap) {
2868 #if defined(PETSC_USE_CTABLE)
2869     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2870 #else
2871     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2873     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2874 #endif
2875   } else a->colmap = NULL;
2876   if (oldmat->garray) {
2877     PetscInt len;
2878     len  = oldmat->B->cmap->n;
2879     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2880     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2881     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2882   } else a->garray = NULL;
2883 
2884   /* It may happen MatDuplicate is called with a non-assembled matrix
2885      In fact, MatDuplicate only requires the matrix to be preallocated
2886      This may happen inside a DMCreateMatrix_Shell */
2887   if (oldmat->lvec) {
2888     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2890   }
2891   if (oldmat->Mvctx) {
2892     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2893     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2894   }
2895   if (oldmat->Mvctx_mpi1) {
2896     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2897     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2898   }
2899 
2900   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2902   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2903   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2904   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2905   *newmat = mat;
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2910 {
2911   PetscBool      isbinary, ishdf5;
2912   PetscErrorCode ierr;
2913 
2914   PetscFunctionBegin;
2915   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2916   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2917   /* force binary viewer to load .info file if it has not yet done so */
2918   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2921   if (isbinary) {
2922     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2923   } else if (ishdf5) {
2924 #if defined(PETSC_HAVE_HDF5)
2925     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2926 #else
2927     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2928 #endif
2929   } else {
2930     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2931   }
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2936 {
2937   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2938   PetscInt       *rowidxs,*colidxs;
2939   PetscScalar    *matvals;
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2944 
2945   /* read in matrix header */
2946   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2947   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2948   M  = header[1]; N = header[2]; nz = header[3];
2949   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2950   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2951   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2952 
2953   /* set block sizes from the viewer's .info file */
2954   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2955   /* set global sizes if not set already */
2956   if (mat->rmap->N < 0) mat->rmap->N = M;
2957   if (mat->cmap->N < 0) mat->cmap->N = N;
2958   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2960 
2961   /* check if the matrix sizes are correct */
2962   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2963   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2964 
2965   /* read in row lengths and build row indices */
2966   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2967   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2968   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2969   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2970   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2971   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2972   /* read in column indices and matrix values */
2973   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2974   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2975   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2976   /* store matrix indices and values */
2977   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2978   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2979   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /* Not scalable because of ISAllGather() unless getting all columns. */
2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscBool      isstride;
2989   PetscMPIInt    lisstride=0,gisstride;
2990 
2991   PetscFunctionBegin;
2992   /* check if we are grabbing all columns*/
2993   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2994 
2995   if (isstride) {
2996     PetscInt  start,len,mstart,mlen;
2997     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2998     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2999     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3000     if (mstart == start && mlen-mstart == len) lisstride = 1;
3001   }
3002 
3003   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3004   if (gisstride) {
3005     PetscInt N;
3006     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3007     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3008     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3009     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3010   } else {
3011     PetscInt cbs;
3012     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3013     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3014     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3015   }
3016 
3017   *isseq = iscol_local;
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 /*
3022  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3023  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3024 
3025  Input Parameters:
3026    mat - matrix
3027    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->rstart <= isrow[i] < mat->rend
3029    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3030            i.e., mat->cstart <= iscol[i] < mat->cend
3031  Output Parameter:
3032    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3033    iscol_o - sequential column index set for retrieving mat->B
3034    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3035  */
3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3037 {
3038   PetscErrorCode ierr;
3039   Vec            x,cmap;
3040   const PetscInt *is_idx;
3041   PetscScalar    *xarray,*cmaparray;
3042   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3043   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3044   Mat            B=a->B;
3045   Vec            lvec=a->lvec,lcmap;
3046   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3047   MPI_Comm       comm;
3048   VecScatter     Mvctx=a->Mvctx;
3049 
3050   PetscFunctionBegin;
3051   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3055   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3056   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3057   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3058   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3059 
3060   /* Get start indices */
3061   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3062   isstart -= ncols;
3063   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3064 
3065   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3066   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3067   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3068   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3069   for (i=0; i<ncols; i++) {
3070     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3071     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3072     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3073   }
3074   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3075   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3076   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3077 
3078   /* Get iscol_d */
3079   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3080   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3081   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3082 
3083   /* Get isrow_d */
3084   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3085   rstart = mat->rmap->rstart;
3086   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3087   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3088   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3089   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3090 
3091   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3092   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3093   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3094 
3095   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3096   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098 
3099   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3100 
3101   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3103 
3104   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3105   /* off-process column indices */
3106   count = 0;
3107   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3108   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3109 
3110   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3111   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3112   for (i=0; i<Bn; i++) {
3113     if (PetscRealPart(xarray[i]) > -1.0) {
3114       idx[count]     = i;                   /* local column index in off-diagonal part B */
3115       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3116       count++;
3117     }
3118   }
3119   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3120   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3121 
3122   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3123   /* cannot ensure iscol_o has same blocksize as iscol! */
3124 
3125   ierr = PetscFree(idx);CHKERRQ(ierr);
3126   *garray = cmap1;
3127 
3128   ierr = VecDestroy(&x);CHKERRQ(ierr);
3129   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3130   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3136 {
3137   PetscErrorCode ierr;
3138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3139   Mat            M = NULL;
3140   MPI_Comm       comm;
3141   IS             iscol_d,isrow_d,iscol_o;
3142   Mat            Asub = NULL,Bsub = NULL;
3143   PetscInt       n;
3144 
3145   PetscFunctionBegin;
3146   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3147 
3148   if (call == MAT_REUSE_MATRIX) {
3149     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3150     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3151     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3152 
3153     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3154     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3155 
3156     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3157     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3158 
3159     /* Update diagonal and off-diagonal portions of submat */
3160     asub = (Mat_MPIAIJ*)(*submat)->data;
3161     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3162     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3163     if (n) {
3164       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3165     }
3166     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3168 
3169   } else { /* call == MAT_INITIAL_MATRIX) */
3170     const PetscInt *garray;
3171     PetscInt        BsubN;
3172 
3173     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3174     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3175 
3176     /* Create local submatrices Asub and Bsub */
3177     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3178     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3179 
3180     /* Create submatrix M */
3181     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3182 
3183     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3184     asub = (Mat_MPIAIJ*)M->data;
3185 
3186     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3187     n = asub->B->cmap->N;
3188     if (BsubN > n) {
3189       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3190       const PetscInt *idx;
3191       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3192       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3193 
3194       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3195       j = 0;
3196       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3197       for (i=0; i<n; i++) {
3198         if (j >= BsubN) break;
3199         while (subgarray[i] > garray[j]) j++;
3200 
3201         if (subgarray[i] == garray[j]) {
3202           idx_new[i] = idx[j++];
3203         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3204       }
3205       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3206 
3207       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3208       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3209 
3210     } else if (BsubN < n) {
3211       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3212     }
3213 
3214     ierr = PetscFree(garray);CHKERRQ(ierr);
3215     *submat = M;
3216 
3217     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3218     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3219     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3220 
3221     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3222     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3223 
3224     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3225     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3226   }
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3231 {
3232   PetscErrorCode ierr;
3233   IS             iscol_local=NULL,isrow_d;
3234   PetscInt       csize;
3235   PetscInt       n,i,j,start,end;
3236   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3237   MPI_Comm       comm;
3238 
3239   PetscFunctionBegin;
3240   /* If isrow has same processor distribution as mat,
3241      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3242   if (call == MAT_REUSE_MATRIX) {
3243     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3244     if (isrow_d) {
3245       sameRowDist  = PETSC_TRUE;
3246       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3247     } else {
3248       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3249       if (iscol_local) {
3250         sameRowDist  = PETSC_TRUE;
3251         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3252       }
3253     }
3254   } else {
3255     /* Check if isrow has same processor distribution as mat */
3256     sameDist[0] = PETSC_FALSE;
3257     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3258     if (!n) {
3259       sameDist[0] = PETSC_TRUE;
3260     } else {
3261       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3262       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3263       if (i >= start && j < end) {
3264         sameDist[0] = PETSC_TRUE;
3265       }
3266     }
3267 
3268     /* Check if iscol has same processor distribution as mat */
3269     sameDist[1] = PETSC_FALSE;
3270     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3271     if (!n) {
3272       sameDist[1] = PETSC_TRUE;
3273     } else {
3274       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3275       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3276       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3277     }
3278 
3279     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3280     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3281     sameRowDist = tsameDist[0];
3282   }
3283 
3284   if (sameRowDist) {
3285     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3286       /* isrow and iscol have same processor distribution as mat */
3287       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3288       PetscFunctionReturn(0);
3289     } else { /* sameRowDist */
3290       /* isrow has same processor distribution as mat */
3291       if (call == MAT_INITIAL_MATRIX) {
3292         PetscBool sorted;
3293         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3294         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3295         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3296         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3297 
3298         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3299         if (sorted) {
3300           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3301           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3302           PetscFunctionReturn(0);
3303         }
3304       } else { /* call == MAT_REUSE_MATRIX */
3305         IS    iscol_sub;
3306         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3307         if (iscol_sub) {
3308           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3309           PetscFunctionReturn(0);
3310         }
3311       }
3312     }
3313   }
3314 
3315   /* General case: iscol -> iscol_local which has global size of iscol */
3316   if (call == MAT_REUSE_MATRIX) {
3317     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3318     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3319   } else {
3320     if (!iscol_local) {
3321       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3322     }
3323   }
3324 
3325   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3326   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3327 
3328   if (call == MAT_INITIAL_MATRIX) {
3329     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3330     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3331   }
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3337          and "off-diagonal" part of the matrix in CSR format.
3338 
3339    Collective
3340 
3341    Input Parameters:
3342 +  comm - MPI communicator
3343 .  A - "diagonal" portion of matrix
3344 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3345 -  garray - global index of B columns
3346 
3347    Output Parameter:
3348 .   mat - the matrix, with input A as its local diagonal matrix
3349    Level: advanced
3350 
3351    Notes:
3352        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3353        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3354 
3355 .seealso: MatCreateMPIAIJWithSplitArrays()
3356 @*/
3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3358 {
3359   PetscErrorCode ierr;
3360   Mat_MPIAIJ     *maij;
3361   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3362   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3363   PetscScalar    *oa=b->a;
3364   Mat            Bnew;
3365   PetscInt       m,n,N;
3366 
3367   PetscFunctionBegin;
3368   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3369   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3370   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3371   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3372   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3373   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3374 
3375   /* Get global columns of mat */
3376   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3377 
3378   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3379   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3380   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3381   maij = (Mat_MPIAIJ*)(*mat)->data;
3382 
3383   (*mat)->preallocated = PETSC_TRUE;
3384 
3385   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3386   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3387 
3388   /* Set A as diagonal portion of *mat */
3389   maij->A = A;
3390 
3391   nz = oi[m];
3392   for (i=0; i<nz; i++) {
3393     col   = oj[i];
3394     oj[i] = garray[col];
3395   }
3396 
3397    /* Set Bnew as off-diagonal portion of *mat */
3398   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3399   bnew        = (Mat_SeqAIJ*)Bnew->data;
3400   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3401   maij->B     = Bnew;
3402 
3403   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3404 
3405   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3406   b->free_a       = PETSC_FALSE;
3407   b->free_ij      = PETSC_FALSE;
3408   ierr = MatDestroy(&B);CHKERRQ(ierr);
3409 
3410   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3411   bnew->free_a       = PETSC_TRUE;
3412   bnew->free_ij      = PETSC_TRUE;
3413 
3414   /* condense columns of maij->B */
3415   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3416   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3419   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3424 
3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3426 {
3427   PetscErrorCode ierr;
3428   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3429   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3430   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3431   Mat            M,Msub,B=a->B;
3432   MatScalar      *aa;
3433   Mat_SeqAIJ     *aij;
3434   PetscInt       *garray = a->garray,*colsub,Ncols;
3435   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3436   IS             iscol_sub,iscmap;
3437   const PetscInt *is_idx,*cmap;
3438   PetscBool      allcolumns=PETSC_FALSE;
3439   MPI_Comm       comm;
3440 
3441   PetscFunctionBegin;
3442   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3443 
3444   if (call == MAT_REUSE_MATRIX) {
3445     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3446     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3447     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3448 
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3450     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3451 
3452     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3453     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3454 
3455     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3456 
3457   } else { /* call == MAT_INITIAL_MATRIX) */
3458     PetscBool flg;
3459 
3460     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3461     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3462 
3463     /* (1) iscol -> nonscalable iscol_local */
3464     /* Check for special case: each processor gets entire matrix columns */
3465     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3466     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3467     if (allcolumns) {
3468       iscol_sub = iscol_local;
3469       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3470       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3471 
3472     } else {
3473       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3474       PetscInt *idx,*cmap1,k;
3475       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3476       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3477       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3478       count = 0;
3479       k     = 0;
3480       for (i=0; i<Ncols; i++) {
3481         j = is_idx[i];
3482         if (j >= cstart && j < cend) {
3483           /* diagonal part of mat */
3484           idx[count]     = j;
3485           cmap1[count++] = i; /* column index in submat */
3486         } else if (Bn) {
3487           /* off-diagonal part of mat */
3488           if (j == garray[k]) {
3489             idx[count]     = j;
3490             cmap1[count++] = i;  /* column index in submat */
3491           } else if (j > garray[k]) {
3492             while (j > garray[k] && k < Bn-1) k++;
3493             if (j == garray[k]) {
3494               idx[count]     = j;
3495               cmap1[count++] = i; /* column index in submat */
3496             }
3497           }
3498         }
3499       }
3500       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3501 
3502       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3503       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3504       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3505 
3506       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3507     }
3508 
3509     /* (3) Create sequential Msub */
3510     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3511   }
3512 
3513   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3514   aij  = (Mat_SeqAIJ*)(Msub)->data;
3515   ii   = aij->i;
3516   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3517 
3518   /*
3519       m - number of local rows
3520       Ncols - number of columns (same on all processors)
3521       rstart - first row in new global matrix generated
3522   */
3523   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3524 
3525   if (call == MAT_INITIAL_MATRIX) {
3526     /* (4) Create parallel newmat */
3527     PetscMPIInt    rank,size;
3528     PetscInt       csize;
3529 
3530     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3532 
3533     /*
3534         Determine the number of non-zeros in the diagonal and off-diagonal
3535         portions of the matrix in order to do correct preallocation
3536     */
3537 
3538     /* first get start and end of "diagonal" columns */
3539     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3540     if (csize == PETSC_DECIDE) {
3541       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3542       if (mglobal == Ncols) { /* square matrix */
3543         nlocal = m;
3544       } else {
3545         nlocal = Ncols/size + ((Ncols % size) > rank);
3546       }
3547     } else {
3548       nlocal = csize;
3549     }
3550     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3551     rstart = rend - nlocal;
3552     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3553 
3554     /* next, compute all the lengths */
3555     jj    = aij->j;
3556     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3557     olens = dlens + m;
3558     for (i=0; i<m; i++) {
3559       jend = ii[i+1] - ii[i];
3560       olen = 0;
3561       dlen = 0;
3562       for (j=0; j<jend; j++) {
3563         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3564         else dlen++;
3565         jj++;
3566       }
3567       olens[i] = olen;
3568       dlens[i] = dlen;
3569     }
3570 
3571     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3572     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3573 
3574     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3575     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3576     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3577     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3578     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3579     ierr = PetscFree(dlens);CHKERRQ(ierr);
3580 
3581   } else { /* call == MAT_REUSE_MATRIX */
3582     M    = *newmat;
3583     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3584     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3585     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3586     /*
3587          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3588        rather than the slower MatSetValues().
3589     */
3590     M->was_assembled = PETSC_TRUE;
3591     M->assembled     = PETSC_FALSE;
3592   }
3593 
3594   /* (5) Set values of Msub to *newmat */
3595   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3596   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3597 
3598   jj   = aij->j;
3599   aa   = aij->a;
3600   for (i=0; i<m; i++) {
3601     row = rstart + i;
3602     nz  = ii[i+1] - ii[i];
3603     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3604     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3605     jj += nz; aa += nz;
3606   }
3607   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3608 
3609   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611 
3612   ierr = PetscFree(colsub);CHKERRQ(ierr);
3613 
3614   /* save Msub, iscol_sub and iscmap used in processor for next request */
3615   if (call ==  MAT_INITIAL_MATRIX) {
3616     *newmat = M;
3617     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3618     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3619 
3620     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3621     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3622 
3623     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3624     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3625 
3626     if (iscol_local) {
3627       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3628       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3629     }
3630   }
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 /*
3635     Not great since it makes two copies of the submatrix, first an SeqAIJ
3636   in local and then by concatenating the local matrices the end result.
3637   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3638 
3639   Note: This requires a sequential iscol with all indices.
3640 */
3641 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3642 {
3643   PetscErrorCode ierr;
3644   PetscMPIInt    rank,size;
3645   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3646   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3647   Mat            M,Mreuse;
3648   MatScalar      *aa,*vwork;
3649   MPI_Comm       comm;
3650   Mat_SeqAIJ     *aij;
3651   PetscBool      colflag,allcolumns=PETSC_FALSE;
3652 
3653   PetscFunctionBegin;
3654   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3655   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3657 
3658   /* Check for special case: each processor gets entire matrix columns */
3659   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3660   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3661   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3662 
3663   if (call ==  MAT_REUSE_MATRIX) {
3664     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3665     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3666     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3667   } else {
3668     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3669   }
3670 
3671   /*
3672       m - number of local rows
3673       n - number of columns (same on all processors)
3674       rstart - first row in new global matrix generated
3675   */
3676   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3677   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3678   if (call == MAT_INITIAL_MATRIX) {
3679     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3680     ii  = aij->i;
3681     jj  = aij->j;
3682 
3683     /*
3684         Determine the number of non-zeros in the diagonal and off-diagonal
3685         portions of the matrix in order to do correct preallocation
3686     */
3687 
3688     /* first get start and end of "diagonal" columns */
3689     if (csize == PETSC_DECIDE) {
3690       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3691       if (mglobal == n) { /* square matrix */
3692         nlocal = m;
3693       } else {
3694         nlocal = n/size + ((n % size) > rank);
3695       }
3696     } else {
3697       nlocal = csize;
3698     }
3699     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3700     rstart = rend - nlocal;
3701     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3702 
3703     /* next, compute all the lengths */
3704     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3705     olens = dlens + m;
3706     for (i=0; i<m; i++) {
3707       jend = ii[i+1] - ii[i];
3708       olen = 0;
3709       dlen = 0;
3710       for (j=0; j<jend; j++) {
3711         if (*jj < rstart || *jj >= rend) olen++;
3712         else dlen++;
3713         jj++;
3714       }
3715       olens[i] = olen;
3716       dlens[i] = dlen;
3717     }
3718     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3719     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3720     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3721     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3722     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3723     ierr = PetscFree(dlens);CHKERRQ(ierr);
3724   } else {
3725     PetscInt ml,nl;
3726 
3727     M    = *newmat;
3728     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3729     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3731     /*
3732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3733        rather than the slower MatSetValues().
3734     */
3735     M->was_assembled = PETSC_TRUE;
3736     M->assembled     = PETSC_FALSE;
3737   }
3738   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3739   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3740   ii   = aij->i;
3741   jj   = aij->j;
3742   aa   = aij->a;
3743   for (i=0; i<m; i++) {
3744     row   = rstart + i;
3745     nz    = ii[i+1] - ii[i];
3746     cwork = jj;     jj += nz;
3747     vwork = aa;     aa += nz;
3748     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3749   }
3750 
3751   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   *newmat = M;
3754 
3755   /* save submatrix used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3758     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3759   }
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3764 {
3765   PetscInt       m,cstart, cend,j,nnz,i,d;
3766   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3767   const PetscInt *JJ;
3768   PetscErrorCode ierr;
3769   PetscBool      nooffprocentries;
3770 
3771   PetscFunctionBegin;
3772   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3773 
3774   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3775   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3776   m      = B->rmap->n;
3777   cstart = B->cmap->rstart;
3778   cend   = B->cmap->rend;
3779   rstart = B->rmap->rstart;
3780 
3781   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3782 
3783 #if defined(PETSC_USE_DEBUG)
3784   for (i=0; i<m; i++) {
3785     nnz = Ii[i+1]- Ii[i];
3786     JJ  = J + Ii[i];
3787     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3788     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3789     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3790   }
3791 #endif
3792 
3793   for (i=0; i<m; i++) {
3794     nnz     = Ii[i+1]- Ii[i];
3795     JJ      = J + Ii[i];
3796     nnz_max = PetscMax(nnz_max,nnz);
3797     d       = 0;
3798     for (j=0; j<nnz; j++) {
3799       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3800     }
3801     d_nnz[i] = d;
3802     o_nnz[i] = nnz - d;
3803   }
3804   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3805   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3806 
3807   for (i=0; i<m; i++) {
3808     ii   = i + rstart;
3809     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3810   }
3811   nooffprocentries    = B->nooffprocentries;
3812   B->nooffprocentries = PETSC_TRUE;
3813   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3815   B->nooffprocentries = nooffprocentries;
3816 
3817   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 /*@
3822    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3823    (the default parallel PETSc format).
3824 
3825    Collective
3826 
3827    Input Parameters:
3828 +  B - the matrix
3829 .  i - the indices into j for the start of each local row (starts with zero)
3830 .  j - the column indices for each local row (starts with zero)
3831 -  v - optional values in the matrix
3832 
3833    Level: developer
3834 
3835    Notes:
3836        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3837      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3838      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3839 
3840        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3841 
3842        The format which is used for the sparse matrix input, is equivalent to a
3843     row-major ordering.. i.e for the following matrix, the input data expected is
3844     as shown
3845 
3846 $        1 0 0
3847 $        2 0 3     P0
3848 $       -------
3849 $        4 5 6     P1
3850 $
3851 $     Process0 [P0]: rows_owned=[0,1]
3852 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3853 $        j =  {0,0,2}  [size = 3]
3854 $        v =  {1,2,3}  [size = 3]
3855 $
3856 $     Process1 [P1]: rows_owned=[2]
3857 $        i =  {0,3}    [size = nrow+1  = 1+1]
3858 $        j =  {0,1,2}  [size = 3]
3859 $        v =  {4,5,6}  [size = 3]
3860 
3861 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3862           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3863 @*/
3864 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3865 {
3866   PetscErrorCode ierr;
3867 
3868   PetscFunctionBegin;
3869   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 /*@C
3874    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3875    (the default parallel PETSc format).  For good matrix assembly performance
3876    the user should preallocate the matrix storage by setting the parameters
3877    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3878    performance can be increased by more than a factor of 50.
3879 
3880    Collective
3881 
3882    Input Parameters:
3883 +  B - the matrix
3884 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3885            (same value is used for all local rows)
3886 .  d_nnz - array containing the number of nonzeros in the various rows of the
3887            DIAGONAL portion of the local submatrix (possibly different for each row)
3888            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3889            The size of this array is equal to the number of local rows, i.e 'm'.
3890            For matrices that will be factored, you must leave room for (and set)
3891            the diagonal entry even if it is zero.
3892 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3893            submatrix (same value is used for all local rows).
3894 -  o_nnz - array containing the number of nonzeros in the various rows of the
3895            OFF-DIAGONAL portion of the local submatrix (possibly different for
3896            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3897            structure. The size of this array is equal to the number
3898            of local rows, i.e 'm'.
3899 
3900    If the *_nnz parameter is given then the *_nz parameter is ignored
3901 
3902    The AIJ format (also called the Yale sparse matrix format or
3903    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3904    storage.  The stored row and column indices begin with zero.
3905    See Users-Manual: ch_mat for details.
3906 
3907    The parallel matrix is partitioned such that the first m0 rows belong to
3908    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3909    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3910 
3911    The DIAGONAL portion of the local submatrix of a processor can be defined
3912    as the submatrix which is obtained by extraction the part corresponding to
3913    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3914    first row that belongs to the processor, r2 is the last row belonging to
3915    the this processor, and c1-c2 is range of indices of the local part of a
3916    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3917    common case of a square matrix, the row and column ranges are the same and
3918    the DIAGONAL part is also square. The remaining portion of the local
3919    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3920 
3921    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3922 
3923    You can call MatGetInfo() to get information on how effective the preallocation was;
3924    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3925    You can also run with the option -info and look for messages with the string
3926    malloc in them to see if additional memory allocation was needed.
3927 
3928    Example usage:
3929 
3930    Consider the following 8x8 matrix with 34 non-zero values, that is
3931    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3932    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3933    as follows:
3934 
3935 .vb
3936             1  2  0  |  0  3  0  |  0  4
3937     Proc0   0  5  6  |  7  0  0  |  8  0
3938             9  0 10  | 11  0  0  | 12  0
3939     -------------------------------------
3940            13  0 14  | 15 16 17  |  0  0
3941     Proc1   0 18  0  | 19 20 21  |  0  0
3942             0  0  0  | 22 23  0  | 24  0
3943     -------------------------------------
3944     Proc2  25 26 27  |  0  0 28  | 29  0
3945            30  0  0  | 31 32 33  |  0 34
3946 .ve
3947 
3948    This can be represented as a collection of submatrices as:
3949 
3950 .vb
3951       A B C
3952       D E F
3953       G H I
3954 .ve
3955 
3956    Where the submatrices A,B,C are owned by proc0, D,E,F are
3957    owned by proc1, G,H,I are owned by proc2.
3958 
3959    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3960    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3961    The 'M','N' parameters are 8,8, and have the same values on all procs.
3962 
3963    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3964    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3965    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3966    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3967    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3968    matrix, ans [DF] as another SeqAIJ matrix.
3969 
3970    When d_nz, o_nz parameters are specified, d_nz storage elements are
3971    allocated for every row of the local diagonal submatrix, and o_nz
3972    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3973    One way to choose d_nz and o_nz is to use the max nonzerors per local
3974    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3975    In this case, the values of d_nz,o_nz are:
3976 .vb
3977      proc0 : dnz = 2, o_nz = 2
3978      proc1 : dnz = 3, o_nz = 2
3979      proc2 : dnz = 1, o_nz = 4
3980 .ve
3981    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3982    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3983    for proc3. i.e we are using 12+15+10=37 storage locations to store
3984    34 values.
3985 
3986    When d_nnz, o_nnz parameters are specified, the storage is specified
3987    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3988    In the above case the values for d_nnz,o_nnz are:
3989 .vb
3990      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3991      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3992      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3993 .ve
3994    Here the space allocated is sum of all the above values i.e 34, and
3995    hence pre-allocation is perfect.
3996 
3997    Level: intermediate
3998 
3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4000           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4001 @*/
4002 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4003 {
4004   PetscErrorCode ierr;
4005 
4006   PetscFunctionBegin;
4007   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4008   PetscValidType(B,1);
4009   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 /*@
4014      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4015          CSR format for the local rows.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  comm - MPI communicator
4021 .  m - number of local rows (Cannot be PETSC_DECIDE)
4022 .  n - This value should be the same as the local size used in creating the
4023        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4024        calculated if N is given) For square matrices n is almost always m.
4025 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4026 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4027 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4028 .   j - column indices
4029 -   a - matrix values
4030 
4031    Output Parameter:
4032 .   mat - the matrix
4033 
4034    Level: intermediate
4035 
4036    Notes:
4037        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4038      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4039      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4040 
4041        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4042 
4043        The format which is used for the sparse matrix input, is equivalent to a
4044     row-major ordering.. i.e for the following matrix, the input data expected is
4045     as shown
4046 
4047        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4048 
4049 $        1 0 0
4050 $        2 0 3     P0
4051 $       -------
4052 $        4 5 6     P1
4053 $
4054 $     Process0 [P0]: rows_owned=[0,1]
4055 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4056 $        j =  {0,0,2}  [size = 3]
4057 $        v =  {1,2,3}  [size = 3]
4058 $
4059 $     Process1 [P1]: rows_owned=[2]
4060 $        i =  {0,3}    [size = nrow+1  = 1+1]
4061 $        j =  {0,1,2}  [size = 3]
4062 $        v =  {4,5,6}  [size = 3]
4063 
4064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4065           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4066 @*/
4067 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4068 {
4069   PetscErrorCode ierr;
4070 
4071   PetscFunctionBegin;
4072   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4073   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4074   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4075   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4076   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4077   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4078   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4079   PetscFunctionReturn(0);
4080 }
4081 
4082 /*@
4083      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4084          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4085 
4086    Collective
4087 
4088    Input Parameters:
4089 +  mat - the matrix
4090 .  m - number of local rows (Cannot be PETSC_DECIDE)
4091 .  n - This value should be the same as the local size used in creating the
4092        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4093        calculated if N is given) For square matrices n is almost always m.
4094 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4095 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4096 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4097 .  J - column indices
4098 -  v - matrix values
4099 
4100    Level: intermediate
4101 
4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4103           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4104 @*/
4105 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4106 {
4107   PetscErrorCode ierr;
4108   PetscInt       cstart,nnz,i,j;
4109   PetscInt       *ld;
4110   PetscBool      nooffprocentries;
4111   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4112   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4113   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4114   const PetscInt *Adi = Ad->i;
4115   PetscInt       ldi,Iii,md;
4116 
4117   PetscFunctionBegin;
4118   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4119   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4120   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4121   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4122 
4123   cstart = mat->cmap->rstart;
4124   if (!Aij->ld) {
4125     /* count number of entries below block diagonal */
4126     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4127     Aij->ld = ld;
4128     for (i=0; i<m; i++) {
4129       nnz  = Ii[i+1]- Ii[i];
4130       j     = 0;
4131       while  (J[j] < cstart && j < nnz) {j++;}
4132       J    += nnz;
4133       ld[i] = j;
4134     }
4135   } else {
4136     ld = Aij->ld;
4137   }
4138 
4139   for (i=0; i<m; i++) {
4140     nnz  = Ii[i+1]- Ii[i];
4141     Iii  = Ii[i];
4142     ldi  = ld[i];
4143     md   = Adi[i+1]-Adi[i];
4144     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4145     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4146     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4147     ad  += md;
4148     ao  += nnz - md;
4149   }
4150   nooffprocentries      = mat->nooffprocentries;
4151   mat->nooffprocentries = PETSC_TRUE;
4152   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4153   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4154   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4155   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4157   mat->nooffprocentries = nooffprocentries;
4158   PetscFunctionReturn(0);
4159 }
4160 
4161 /*@C
4162    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4163    (the default parallel PETSc format).  For good matrix assembly performance
4164    the user should preallocate the matrix storage by setting the parameters
4165    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4166    performance can be increased by more than a factor of 50.
4167 
4168    Collective
4169 
4170    Input Parameters:
4171 +  comm - MPI communicator
4172 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4173            This value should be the same as the local size used in creating the
4174            y vector for the matrix-vector product y = Ax.
4175 .  n - This value should be the same as the local size used in creating the
4176        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4177        calculated if N is given) For square matrices n is almost always m.
4178 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4179 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4180 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4181            (same value is used for all local rows)
4182 .  d_nnz - array containing the number of nonzeros in the various rows of the
4183            DIAGONAL portion of the local submatrix (possibly different for each row)
4184            or NULL, if d_nz is used to specify the nonzero structure.
4185            The size of this array is equal to the number of local rows, i.e 'm'.
4186 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4187            submatrix (same value is used for all local rows).
4188 -  o_nnz - array containing the number of nonzeros in the various rows of the
4189            OFF-DIAGONAL portion of the local submatrix (possibly different for
4190            each row) or NULL, if o_nz is used to specify the nonzero
4191            structure. The size of this array is equal to the number
4192            of local rows, i.e 'm'.
4193 
4194    Output Parameter:
4195 .  A - the matrix
4196 
4197    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4198    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4199    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4200 
4201    Notes:
4202    If the *_nnz parameter is given then the *_nz parameter is ignored
4203 
4204    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4205    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4206    storage requirements for this matrix.
4207 
4208    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4209    processor than it must be used on all processors that share the object for
4210    that argument.
4211 
4212    The user MUST specify either the local or global matrix dimensions
4213    (possibly both).
4214 
4215    The parallel matrix is partitioned across processors such that the
4216    first m0 rows belong to process 0, the next m1 rows belong to
4217    process 1, the next m2 rows belong to process 2 etc.. where
4218    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4219    values corresponding to [m x N] submatrix.
4220 
4221    The columns are logically partitioned with the n0 columns belonging
4222    to 0th partition, the next n1 columns belonging to the next
4223    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4224 
4225    The DIAGONAL portion of the local submatrix on any given processor
4226    is the submatrix corresponding to the rows and columns m,n
4227    corresponding to the given processor. i.e diagonal matrix on
4228    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4229    etc. The remaining portion of the local submatrix [m x (N-n)]
4230    constitute the OFF-DIAGONAL portion. The example below better
4231    illustrates this concept.
4232 
4233    For a square global matrix we define each processor's diagonal portion
4234    to be its local rows and the corresponding columns (a square submatrix);
4235    each processor's off-diagonal portion encompasses the remainder of the
4236    local matrix (a rectangular submatrix).
4237 
4238    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4239 
4240    When calling this routine with a single process communicator, a matrix of
4241    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4242    type of communicator, use the construction mechanism
4243 .vb
4244      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4245 .ve
4246 
4247 $     MatCreate(...,&A);
4248 $     MatSetType(A,MATMPIAIJ);
4249 $     MatSetSizes(A, m,n,M,N);
4250 $     MatMPIAIJSetPreallocation(A,...);
4251 
4252    By default, this format uses inodes (identical nodes) when possible.
4253    We search for consecutive rows with the same nonzero structure, thereby
4254    reusing matrix information to achieve increased efficiency.
4255 
4256    Options Database Keys:
4257 +  -mat_no_inode  - Do not use inodes
4258 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4259 
4260 
4261 
4262    Example usage:
4263 
4264    Consider the following 8x8 matrix with 34 non-zero values, that is
4265    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4266    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4267    as follows
4268 
4269 .vb
4270             1  2  0  |  0  3  0  |  0  4
4271     Proc0   0  5  6  |  7  0  0  |  8  0
4272             9  0 10  | 11  0  0  | 12  0
4273     -------------------------------------
4274            13  0 14  | 15 16 17  |  0  0
4275     Proc1   0 18  0  | 19 20 21  |  0  0
4276             0  0  0  | 22 23  0  | 24  0
4277     -------------------------------------
4278     Proc2  25 26 27  |  0  0 28  | 29  0
4279            30  0  0  | 31 32 33  |  0 34
4280 .ve
4281 
4282    This can be represented as a collection of submatrices as
4283 
4284 .vb
4285       A B C
4286       D E F
4287       G H I
4288 .ve
4289 
4290    Where the submatrices A,B,C are owned by proc0, D,E,F are
4291    owned by proc1, G,H,I are owned by proc2.
4292 
4293    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4294    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4295    The 'M','N' parameters are 8,8, and have the same values on all procs.
4296 
4297    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4298    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4299    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4300    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4301    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4302    matrix, ans [DF] as another SeqAIJ matrix.
4303 
4304    When d_nz, o_nz parameters are specified, d_nz storage elements are
4305    allocated for every row of the local diagonal submatrix, and o_nz
4306    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4307    One way to choose d_nz and o_nz is to use the max nonzerors per local
4308    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4309    In this case, the values of d_nz,o_nz are
4310 .vb
4311      proc0 : dnz = 2, o_nz = 2
4312      proc1 : dnz = 3, o_nz = 2
4313      proc2 : dnz = 1, o_nz = 4
4314 .ve
4315    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4316    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4317    for proc3. i.e we are using 12+15+10=37 storage locations to store
4318    34 values.
4319 
4320    When d_nnz, o_nnz parameters are specified, the storage is specified
4321    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4322    In the above case the values for d_nnz,o_nnz are
4323 .vb
4324      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4325      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4326      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4327 .ve
4328    Here the space allocated is sum of all the above values i.e 34, and
4329    hence pre-allocation is perfect.
4330 
4331    Level: intermediate
4332 
4333 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4334           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4335 @*/
4336 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4337 {
4338   PetscErrorCode ierr;
4339   PetscMPIInt    size;
4340 
4341   PetscFunctionBegin;
4342   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4343   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4344   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4345   if (size > 1) {
4346     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4347     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4348   } else {
4349     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4350     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4351   }
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 /*@C
4356   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4357 
4358   Not collective
4359 
4360   Input Parameter:
4361 . A - The MPIAIJ matrix
4362 
4363   Output Parameters:
4364 + Ad - The local diagonal block as a SeqAIJ matrix
4365 . Ao - The local off-diagonal block as a SeqAIJ matrix
4366 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4367 
4368   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4369   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4370   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4371   local column numbers to global column numbers in the original matrix.
4372 
4373   Level: intermediate
4374 
4375 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4376 @*/
4377 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4378 {
4379   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4380   PetscBool      flg;
4381   PetscErrorCode ierr;
4382 
4383   PetscFunctionBegin;
4384   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4385   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4386   if (Ad)     *Ad     = a->A;
4387   if (Ao)     *Ao     = a->B;
4388   if (colmap) *colmap = a->garray;
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4393 {
4394   PetscErrorCode ierr;
4395   PetscInt       m,N,i,rstart,nnz,Ii;
4396   PetscInt       *indx;
4397   PetscScalar    *values;
4398 
4399   PetscFunctionBegin;
4400   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4401   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4402     PetscInt       *dnz,*onz,sum,bs,cbs;
4403 
4404     if (n == PETSC_DECIDE) {
4405       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4406     }
4407     /* Check sum(n) = N */
4408     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4410 
4411     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412     rstart -= m;
4413 
4414     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4415     for (i=0; i<m; i++) {
4416       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4417       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4418       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419     }
4420 
4421     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4422     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4423     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4424     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4425     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4426     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4427     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4428     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4429   }
4430 
4431   /* numeric phase */
4432   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4433   for (i=0; i<m; i++) {
4434     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4435     Ii   = i + rstart;
4436     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4437     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4438   }
4439   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4440   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4441   PetscFunctionReturn(0);
4442 }
4443 
4444 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4445 {
4446   PetscErrorCode    ierr;
4447   PetscMPIInt       rank;
4448   PetscInt          m,N,i,rstart,nnz;
4449   size_t            len;
4450   const PetscInt    *indx;
4451   PetscViewer       out;
4452   char              *name;
4453   Mat               B;
4454   const PetscScalar *values;
4455 
4456   PetscFunctionBegin;
4457   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4458   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4459   /* Should this be the type of the diagonal block of A? */
4460   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4461   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4462   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4463   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4464   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4465   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4466   for (i=0; i<m; i++) {
4467     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4468     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4469     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4470   }
4471   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473 
4474   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4475   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4476   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4477   sprintf(name,"%s.%d",outfile,rank);
4478   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4479   ierr = PetscFree(name);CHKERRQ(ierr);
4480   ierr = MatView(B,out);CHKERRQ(ierr);
4481   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4482   ierr = MatDestroy(&B);CHKERRQ(ierr);
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4487 {
4488   PetscErrorCode      ierr;
4489   Mat_Merge_SeqsToMPI *merge;
4490   PetscContainer      container;
4491 
4492   PetscFunctionBegin;
4493   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4494   if (container) {
4495     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4508     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4509     ierr = PetscFree(merge);CHKERRQ(ierr);
4510     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4511   }
4512   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4513   PetscFunctionReturn(0);
4514 }
4515 
4516 #include <../src/mat/utils/freespace.h>
4517 #include <petscbt.h>
4518 
4519 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4520 {
4521   PetscErrorCode      ierr;
4522   MPI_Comm            comm;
4523   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4524   PetscMPIInt         size,rank,taga,*len_s;
4525   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4526   PetscInt            proc,m;
4527   PetscInt            **buf_ri,**buf_rj;
4528   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4529   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4530   MPI_Request         *s_waits,*r_waits;
4531   MPI_Status          *status;
4532   MatScalar           *aa=a->a;
4533   MatScalar           **abuf_r,*ba_i;
4534   Mat_Merge_SeqsToMPI *merge;
4535   PetscContainer      container;
4536 
4537   PetscFunctionBegin;
4538   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4539   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4540 
4541   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4542   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4543 
4544   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4545   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4546 
4547   bi     = merge->bi;
4548   bj     = merge->bj;
4549   buf_ri = merge->buf_ri;
4550   buf_rj = merge->buf_rj;
4551 
4552   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4553   owners = merge->rowmap->range;
4554   len_s  = merge->len_s;
4555 
4556   /* send and recv matrix values */
4557   /*-----------------------------*/
4558   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4559   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4560 
4561   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4562   for (proc=0,k=0; proc<size; proc++) {
4563     if (!len_s[proc]) continue;
4564     i    = owners[proc];
4565     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4566     k++;
4567   }
4568 
4569   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4570   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4571   ierr = PetscFree(status);CHKERRQ(ierr);
4572 
4573   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4574   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4575 
4576   /* insert mat values of mpimat */
4577   /*----------------------------*/
4578   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4579   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4580 
4581   for (k=0; k<merge->nrecv; k++) {
4582     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4583     nrows       = *(buf_ri_k[k]);
4584     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4585     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4586   }
4587 
4588   /* set values of ba */
4589   m = merge->rowmap->n;
4590   for (i=0; i<m; i++) {
4591     arow = owners[rank] + i;
4592     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4593     bnzi = bi[i+1] - bi[i];
4594     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4595 
4596     /* add local non-zero vals of this proc's seqmat into ba */
4597     anzi   = ai[arow+1] - ai[arow];
4598     aj     = a->j + ai[arow];
4599     aa     = a->a + ai[arow];
4600     nextaj = 0;
4601     for (j=0; nextaj<anzi; j++) {
4602       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4603         ba_i[j] += aa[nextaj++];
4604       }
4605     }
4606 
4607     /* add received vals into ba */
4608     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4609       /* i-th row */
4610       if (i == *nextrow[k]) {
4611         anzi   = *(nextai[k]+1) - *nextai[k];
4612         aj     = buf_rj[k] + *(nextai[k]);
4613         aa     = abuf_r[k] + *(nextai[k]);
4614         nextaj = 0;
4615         for (j=0; nextaj<anzi; j++) {
4616           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4617             ba_i[j] += aa[nextaj++];
4618           }
4619         }
4620         nextrow[k]++; nextai[k]++;
4621       }
4622     }
4623     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4624   }
4625   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4626   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4627 
4628   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4629   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4630   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4631   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4632   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4633   PetscFunctionReturn(0);
4634 }
4635 
4636 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4637 {
4638   PetscErrorCode      ierr;
4639   Mat                 B_mpi;
4640   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4641   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4642   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4643   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4644   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4645   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4646   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4647   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4648   MPI_Status          *status;
4649   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4650   PetscBT             lnkbt;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4656 
4657   /* make sure it is a PETSc comm */
4658   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4660   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4661 
4662   ierr = PetscNew(&merge);CHKERRQ(ierr);
4663   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4664 
4665   /* determine row ownership */
4666   /*---------------------------------------------------------*/
4667   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4672   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4673   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4674 
4675   m      = merge->rowmap->n;
4676   owners = merge->rowmap->range;
4677 
4678   /* determine the number of messages to send, their lengths */
4679   /*---------------------------------------------------------*/
4680   len_s = merge->len_s;
4681 
4682   len          = 0; /* length of buf_si[] */
4683   merge->nsend = 0;
4684   for (proc=0; proc<size; proc++) {
4685     len_si[proc] = 0;
4686     if (proc == rank) {
4687       len_s[proc] = 0;
4688     } else {
4689       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4690       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4691     }
4692     if (len_s[proc]) {
4693       merge->nsend++;
4694       nrows = 0;
4695       for (i=owners[proc]; i<owners[proc+1]; i++) {
4696         if (ai[i+1] > ai[i]) nrows++;
4697       }
4698       len_si[proc] = 2*(nrows+1);
4699       len         += len_si[proc];
4700     }
4701   }
4702 
4703   /* determine the number and length of messages to receive for ij-structure */
4704   /*-------------------------------------------------------------------------*/
4705   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4706   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4707 
4708   /* post the Irecv of j-structure */
4709   /*-------------------------------*/
4710   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4711   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4712 
4713   /* post the Isend of j-structure */
4714   /*--------------------------------*/
4715   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4716 
4717   for (proc=0, k=0; proc<size; proc++) {
4718     if (!len_s[proc]) continue;
4719     i    = owners[proc];
4720     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4721     k++;
4722   }
4723 
4724   /* receives and sends of j-structure are complete */
4725   /*------------------------------------------------*/
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4728 
4729   /* send and recv i-structure */
4730   /*---------------------------*/
4731   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4732   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4733 
4734   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4735   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4736   for (proc=0,k=0; proc<size; proc++) {
4737     if (!len_s[proc]) continue;
4738     /* form outgoing message for i-structure:
4739          buf_si[0]:                 nrows to be sent
4740                [1:nrows]:           row index (global)
4741                [nrows+1:2*nrows+1]: i-structure index
4742     */
4743     /*-------------------------------------------*/
4744     nrows       = len_si[proc]/2 - 1;
4745     buf_si_i    = buf_si + nrows+1;
4746     buf_si[0]   = nrows;
4747     buf_si_i[0] = 0;
4748     nrows       = 0;
4749     for (i=owners[proc]; i<owners[proc+1]; i++) {
4750       anzi = ai[i+1] - ai[i];
4751       if (anzi) {
4752         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4753         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4754         nrows++;
4755       }
4756     }
4757     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4758     k++;
4759     buf_si += len_si[proc];
4760   }
4761 
4762   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4763   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4764 
4765   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4766   for (i=0; i<merge->nrecv; i++) {
4767     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4768   }
4769 
4770   ierr = PetscFree(len_si);CHKERRQ(ierr);
4771   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4772   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4775   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4776   ierr = PetscFree(status);CHKERRQ(ierr);
4777 
4778   /* compute a local seq matrix in each processor */
4779   /*----------------------------------------------*/
4780   /* allocate bi array and free space for accumulating nonzero column info */
4781   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4782   bi[0] = 0;
4783 
4784   /* create and initialize a linked list */
4785   nlnk = N+1;
4786   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4789   len  = ai[owners[rank+1]] - ai[owners[rank]];
4790   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4791 
4792   current_space = free_space;
4793 
4794   /* determine symbolic info for each local row */
4795   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4796 
4797   for (k=0; k<merge->nrecv; k++) {
4798     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4799     nrows       = *buf_ri_k[k];
4800     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4801     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4802   }
4803 
4804   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4805   len  = 0;
4806   for (i=0; i<m; i++) {
4807     bnzi = 0;
4808     /* add local non-zero cols of this proc's seqmat into lnk */
4809     arow  = owners[rank] + i;
4810     anzi  = ai[arow+1] - ai[arow];
4811     aj    = a->j + ai[arow];
4812     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813     bnzi += nlnk;
4814     /* add received col data into lnk */
4815     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4816       if (i == *nextrow[k]) { /* i-th row */
4817         anzi  = *(nextai[k]+1) - *nextai[k];
4818         aj    = buf_rj[k] + *nextai[k];
4819         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4820         bnzi += nlnk;
4821         nextrow[k]++; nextai[k]++;
4822       }
4823     }
4824     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4825 
4826     /* if free space is not available, make more free space */
4827     if (current_space->local_remaining<bnzi) {
4828       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4829       nspacedouble++;
4830     }
4831     /* copy data into free space, then initialize lnk */
4832     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4833     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4834 
4835     current_space->array           += bnzi;
4836     current_space->local_used      += bnzi;
4837     current_space->local_remaining -= bnzi;
4838 
4839     bi[i+1] = bi[i] + bnzi;
4840   }
4841 
4842   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4843 
4844   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4845   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4846   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4847 
4848   /* create symbolic parallel matrix B_mpi */
4849   /*---------------------------------------*/
4850   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4851   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4852   if (n==PETSC_DECIDE) {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4854   } else {
4855     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4856   }
4857   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4858   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4859   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4861   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4862 
4863   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4864   B_mpi->assembled    = PETSC_FALSE;
4865   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4866   merge->bi           = bi;
4867   merge->bj           = bj;
4868   merge->buf_ri       = buf_ri;
4869   merge->buf_rj       = buf_rj;
4870   merge->coi          = NULL;
4871   merge->coj          = NULL;
4872   merge->owners_co    = NULL;
4873 
4874   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4875 
4876   /* attach the supporting struct to B_mpi for reuse */
4877   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4878   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4879   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4880   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4881   *mpimat = B_mpi;
4882 
4883   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4884   PetscFunctionReturn(0);
4885 }
4886 
4887 /*@C
4888       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4889                  matrices from each processor
4890 
4891     Collective
4892 
4893    Input Parameters:
4894 +    comm - the communicators the parallel matrix will live on
4895 .    seqmat - the input sequential matrices
4896 .    m - number of local rows (or PETSC_DECIDE)
4897 .    n - number of local columns (or PETSC_DECIDE)
4898 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4899 
4900    Output Parameter:
4901 .    mpimat - the parallel matrix generated
4902 
4903     Level: advanced
4904 
4905    Notes:
4906      The dimensions of the sequential matrix in each processor MUST be the same.
4907      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4908      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4909 @*/
4910 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4911 {
4912   PetscErrorCode ierr;
4913   PetscMPIInt    size;
4914 
4915   PetscFunctionBegin;
4916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4917   if (size == 1) {
4918     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919     if (scall == MAT_INITIAL_MATRIX) {
4920       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4921     } else {
4922       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4923     }
4924     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925     PetscFunctionReturn(0);
4926   }
4927   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4928   if (scall == MAT_INITIAL_MATRIX) {
4929     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4930   }
4931   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4932   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 /*@
4937      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4938           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4939           with MatGetSize()
4940 
4941     Not Collective
4942 
4943    Input Parameters:
4944 +    A - the matrix
4945 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4946 
4947    Output Parameter:
4948 .    A_loc - the local sequential matrix generated
4949 
4950     Level: developer
4951 
4952    Notes:
4953      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4954      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4955      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4956      modify the values of the returned A_loc.
4957 
4958 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4959 
4960 @*/
4961 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4962 {
4963   PetscErrorCode ierr;
4964   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4965   Mat_SeqAIJ     *mat,*a,*b;
4966   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4967   MatScalar      *aa,*ba,*cam;
4968   PetscScalar    *ca;
4969   PetscMPIInt    size;
4970   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4971   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4972   PetscBool      match;
4973 
4974   PetscFunctionBegin;
4975   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4976   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4977   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4978   if (size == 1) {
4979     if (scall == MAT_INITIAL_MATRIX) {
4980       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4981       *A_loc = mpimat->A;
4982     } else if (scall == MAT_REUSE_MATRIX) {
4983       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4984     }
4985     PetscFunctionReturn(0);
4986   }
4987 
4988   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4989   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4990   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4991   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4992   aa = a->a; ba = b->a;
4993   if (scall == MAT_INITIAL_MATRIX) {
4994     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4995     ci[0] = 0;
4996     for (i=0; i<am; i++) {
4997       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4998     }
4999     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5000     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5001     k    = 0;
5002     for (i=0; i<am; i++) {
5003       ncols_o = bi[i+1] - bi[i];
5004       ncols_d = ai[i+1] - ai[i];
5005       /* off-diagonal portion of A */
5006       for (jo=0; jo<ncols_o; jo++) {
5007         col = cmap[*bj];
5008         if (col >= cstart) break;
5009         cj[k]   = col; bj++;
5010         ca[k++] = *ba++;
5011       }
5012       /* diagonal portion of A */
5013       for (j=0; j<ncols_d; j++) {
5014         cj[k]   = cstart + *aj++;
5015         ca[k++] = *aa++;
5016       }
5017       /* off-diagonal portion of A */
5018       for (j=jo; j<ncols_o; j++) {
5019         cj[k]   = cmap[*bj++];
5020         ca[k++] = *ba++;
5021       }
5022     }
5023     /* put together the new matrix */
5024     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5025     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5026     /* Since these are PETSc arrays, change flags to free them as necessary. */
5027     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5028     mat->free_a  = PETSC_TRUE;
5029     mat->free_ij = PETSC_TRUE;
5030     mat->nonew   = 0;
5031   } else if (scall == MAT_REUSE_MATRIX) {
5032     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5033     ci = mat->i; cj = mat->j; cam = mat->a;
5034     for (i=0; i<am; i++) {
5035       /* off-diagonal portion of A */
5036       ncols_o = bi[i+1] - bi[i];
5037       for (jo=0; jo<ncols_o; jo++) {
5038         col = cmap[*bj];
5039         if (col >= cstart) break;
5040         *cam++ = *ba++; bj++;
5041       }
5042       /* diagonal portion of A */
5043       ncols_d = ai[i+1] - ai[i];
5044       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5045       /* off-diagonal portion of A */
5046       for (j=jo; j<ncols_o; j++) {
5047         *cam++ = *ba++; bj++;
5048       }
5049     }
5050   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5051   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5052   PetscFunctionReturn(0);
5053 }
5054 
5055 /*@C
5056      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5057 
5058     Not Collective
5059 
5060    Input Parameters:
5061 +    A - the matrix
5062 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5063 -    row, col - index sets of rows and columns to extract (or NULL)
5064 
5065    Output Parameter:
5066 .    A_loc - the local sequential matrix generated
5067 
5068     Level: developer
5069 
5070 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5071 
5072 @*/
5073 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5074 {
5075   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5076   PetscErrorCode ierr;
5077   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5078   IS             isrowa,iscola;
5079   Mat            *aloc;
5080   PetscBool      match;
5081 
5082   PetscFunctionBegin;
5083   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5084   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5085   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5086   if (!row) {
5087     start = A->rmap->rstart; end = A->rmap->rend;
5088     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5089   } else {
5090     isrowa = *row;
5091   }
5092   if (!col) {
5093     start = A->cmap->rstart;
5094     cmap  = a->garray;
5095     nzA   = a->A->cmap->n;
5096     nzB   = a->B->cmap->n;
5097     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5098     ncols = 0;
5099     for (i=0; i<nzB; i++) {
5100       if (cmap[i] < start) idx[ncols++] = cmap[i];
5101       else break;
5102     }
5103     imark = i;
5104     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5105     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5106     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5107   } else {
5108     iscola = *col;
5109   }
5110   if (scall != MAT_INITIAL_MATRIX) {
5111     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5112     aloc[0] = *A_loc;
5113   }
5114   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5115   if (!col) { /* attach global id of condensed columns */
5116     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5117   }
5118   *A_loc = aloc[0];
5119   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5120   if (!row) {
5121     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5122   }
5123   if (!col) {
5124     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5125   }
5126   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   PetscFunctionReturn(0);
5128 }
5129 
5130 /*
5131  * Destroy a mat that may be composed with PetscSF communication objects.
5132  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5133  * */
5134 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5135 {
5136   PetscSF          sf,osf;
5137   IS               map;
5138   PetscErrorCode   ierr;
5139 
5140   PetscFunctionBegin;
5141   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5142   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5143   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5144   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5145   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5146   ierr = ISDestroy(&map);CHKERRQ(ierr);
5147   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 /*
5152  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5153  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5154  * on a global size.
5155  * */
5156 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5157 {
5158   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5159   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5160   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5161   PetscMPIInt              owner;
5162   PetscSFNode              *iremote,*oiremote;
5163   const PetscInt           *lrowindices;
5164   PetscErrorCode           ierr;
5165   PetscSF                  sf,osf;
5166   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5167   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5168   MPI_Comm                 comm;
5169   ISLocalToGlobalMapping   mapping;
5170 
5171   PetscFunctionBegin;
5172   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5173   /* plocalsize is the number of roots
5174    * nrows is the number of leaves
5175    * */
5176   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5177   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5178   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5179   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5180   for (i=0;i<nrows;i++) {
5181     /* Find a remote index and an owner for a row
5182      * The row could be local or remote
5183      * */
5184     owner = 0;
5185     lidx  = 0;
5186     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5187     iremote[i].index = lidx;
5188     iremote[i].rank  = owner;
5189   }
5190   /* Create SF to communicate how many nonzero columns for each row */
5191   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5192   /* SF will figure out the number of nonzero colunms for each row, and their
5193    * offsets
5194    * */
5195   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5196   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5197   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5198 
5199   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5200   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5201   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5202   roffsets[0] = 0;
5203   roffsets[1] = 0;
5204   for (i=0;i<plocalsize;i++) {
5205     /* diag */
5206     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5207     /* off diag */
5208     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5209     /* compute offsets so that we relative location for each row */
5210     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5211     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5212   }
5213   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5214   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5215   /* 'r' means root, and 'l' means leaf */
5216   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5217   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5218   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5219   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5220   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5221   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5222   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5223   dntotalcols = 0;
5224   ontotalcols = 0;
5225   ncol = 0;
5226   for (i=0;i<nrows;i++) {
5227     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5228     ncol = PetscMax(pnnz[i],ncol);
5229     /* diag */
5230     dntotalcols += nlcols[i*2+0];
5231     /* off diag */
5232     ontotalcols += nlcols[i*2+1];
5233   }
5234   /* We do not need to figure the right number of columns
5235    * since all the calculations will be done by going through the raw data
5236    * */
5237   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5238   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5239   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5240   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5241   /* diag */
5242   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5243   /* off diag */
5244   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5245   /* diag */
5246   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5247   /* off diag */
5248   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5249   dntotalcols = 0;
5250   ontotalcols = 0;
5251   ntotalcols  = 0;
5252   for (i=0;i<nrows;i++) {
5253     owner = 0;
5254     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5255     /* Set iremote for diag matrix */
5256     for (j=0;j<nlcols[i*2+0];j++) {
5257       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5258       iremote[dntotalcols].rank    = owner;
5259       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5260       ilocal[dntotalcols++]        = ntotalcols++;
5261     }
5262     /* off diag */
5263     for (j=0;j<nlcols[i*2+1];j++) {
5264       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5265       oiremote[ontotalcols].rank    = owner;
5266       oilocal[ontotalcols++]        = ntotalcols++;
5267     }
5268   }
5269   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5270   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5271   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5272   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5273   /* P serves as roots and P_oth is leaves
5274    * Diag matrix
5275    * */
5276   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5277   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5278   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5279 
5280   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5281   /* Off diag */
5282   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5283   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5284   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5285   /* We operate on the matrix internal data for saving memory */
5286   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5287   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5288   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5289   /* Convert to global indices for diag matrix */
5290   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5291   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5292   /* We want P_oth store global indices */
5293   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5294   /* Use memory scalable approach */
5295   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5296   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5297   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5298   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5299   /* Convert back to local indices */
5300   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5301   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5302   nout = 0;
5303   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5304   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5305   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5306   /* Exchange values */
5307   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5308   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5309   /* Stop PETSc from shrinking memory */
5310   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5311   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5312   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5313   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5314   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5315   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5316   /* ``New MatDestroy" takes care of PetscSF objects as well */
5317   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5318   PetscFunctionReturn(0);
5319 }
5320 
5321 /*
5322  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5323  * This supports MPIAIJ and MAIJ
5324  * */
5325 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5326 {
5327   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5328   Mat_SeqAIJ            *p_oth;
5329   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5330   IS                    rows,map;
5331   PetscHMapI            hamp;
5332   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5333   MPI_Comm              comm;
5334   PetscSF               sf,osf;
5335   PetscBool             has;
5336   PetscErrorCode        ierr;
5337 
5338   PetscFunctionBegin;
5339   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5340   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5341   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5342    *  and then create a submatrix (that often is an overlapping matrix)
5343    * */
5344   if (reuse==MAT_INITIAL_MATRIX) {
5345     /* Use a hash table to figure out unique keys */
5346     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5347     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5348     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5349     count = 0;
5350     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5351     for (i=0;i<a->B->cmap->n;i++) {
5352       key  = a->garray[i]/dof;
5353       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5354       if (!has) {
5355         mapping[i] = count;
5356         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5357       } else {
5358         /* Current 'i' has the same value the previous step */
5359         mapping[i] = count-1;
5360       }
5361     }
5362     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5363     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5364     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5365     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5366     off = 0;
5367     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5368     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5369     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5370     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5371     /* In case, the matrix was already created but users want to recreate the matrix */
5372     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5373     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5374     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5375     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5376   } else if (reuse==MAT_REUSE_MATRIX) {
5377     /* If matrix was already created, we simply update values using SF objects
5378      * that as attached to the matrix ealier.
5379      *  */
5380     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5381     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5382     if (!sf || !osf) {
5383       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5384     }
5385     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5386     /* Update values in place */
5387     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5388     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5389     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5390     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5391   } else {
5392     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5393   }
5394   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5395   PetscFunctionReturn(0);
5396 }
5397 
5398 /*@C
5399     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5400 
5401     Collective on Mat
5402 
5403    Input Parameters:
5404 +    A,B - the matrices in mpiaij format
5405 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5406 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5407 
5408    Output Parameter:
5409 +    rowb, colb - index sets of rows and columns of B to extract
5410 -    B_seq - the sequential matrix generated
5411 
5412     Level: developer
5413 
5414 @*/
5415 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5416 {
5417   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5418   PetscErrorCode ierr;
5419   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5420   IS             isrowb,iscolb;
5421   Mat            *bseq=NULL;
5422 
5423   PetscFunctionBegin;
5424   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5425     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5426   }
5427   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5428 
5429   if (scall == MAT_INITIAL_MATRIX) {
5430     start = A->cmap->rstart;
5431     cmap  = a->garray;
5432     nzA   = a->A->cmap->n;
5433     nzB   = a->B->cmap->n;
5434     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5435     ncols = 0;
5436     for (i=0; i<nzB; i++) {  /* row < local row index */
5437       if (cmap[i] < start) idx[ncols++] = cmap[i];
5438       else break;
5439     }
5440     imark = i;
5441     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5442     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5443     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5444     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5445   } else {
5446     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5447     isrowb  = *rowb; iscolb = *colb;
5448     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5449     bseq[0] = *B_seq;
5450   }
5451   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5452   *B_seq = bseq[0];
5453   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5454   if (!rowb) {
5455     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5456   } else {
5457     *rowb = isrowb;
5458   }
5459   if (!colb) {
5460     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5461   } else {
5462     *colb = iscolb;
5463   }
5464   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5465   PetscFunctionReturn(0);
5466 }
5467 
5468 /*
5469     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5470     of the OFF-DIAGONAL portion of local A
5471 
5472     Collective on Mat
5473 
5474    Input Parameters:
5475 +    A,B - the matrices in mpiaij format
5476 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5477 
5478    Output Parameter:
5479 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5480 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5481 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5482 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5483 
5484     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5485      for this matrix. This is not desirable..
5486 
5487     Level: developer
5488 
5489 */
5490 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5491 {
5492   PetscErrorCode         ierr;
5493   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5494   Mat_SeqAIJ             *b_oth;
5495   VecScatter             ctx;
5496   MPI_Comm               comm;
5497   const PetscMPIInt      *rprocs,*sprocs;
5498   const PetscInt         *srow,*rstarts,*sstarts;
5499   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5500   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5501   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5502   MPI_Request            *rwaits = NULL,*swaits = NULL;
5503   MPI_Status             rstatus;
5504   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5505 
5506   PetscFunctionBegin;
5507   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5508   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5509 
5510   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5511     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5512   }
5513   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5514   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5515 
5516   if (size == 1) {
5517     startsj_s = NULL;
5518     bufa_ptr  = NULL;
5519     *B_oth    = NULL;
5520     PetscFunctionReturn(0);
5521   }
5522 
5523   ctx = a->Mvctx;
5524   tag = ((PetscObject)ctx)->tag;
5525 
5526   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5527   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5528   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5529   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5530   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5531   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5532   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5533 
5534   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5535   if (scall == MAT_INITIAL_MATRIX) {
5536     /* i-array */
5537     /*---------*/
5538     /*  post receives */
5539     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5540     for (i=0; i<nrecvs; i++) {
5541       rowlen = rvalues + rstarts[i]*rbs;
5542       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5543       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5544     }
5545 
5546     /* pack the outgoing message */
5547     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5548 
5549     sstartsj[0] = 0;
5550     rstartsj[0] = 0;
5551     len         = 0; /* total length of j or a array to be sent */
5552     if (nsends) {
5553       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5554       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5555     }
5556     for (i=0; i<nsends; i++) {
5557       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5558       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5559       for (j=0; j<nrows; j++) {
5560         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5561         for (l=0; l<sbs; l++) {
5562           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5563 
5564           rowlen[j*sbs+l] = ncols;
5565 
5566           len += ncols;
5567           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5568         }
5569         k++;
5570       }
5571       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5572 
5573       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5574     }
5575     /* recvs and sends of i-array are completed */
5576     i = nrecvs;
5577     while (i--) {
5578       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5579     }
5580     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5581     ierr = PetscFree(svalues);CHKERRQ(ierr);
5582 
5583     /* allocate buffers for sending j and a arrays */
5584     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5585     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5586 
5587     /* create i-array of B_oth */
5588     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5589 
5590     b_othi[0] = 0;
5591     len       = 0; /* total length of j or a array to be received */
5592     k         = 0;
5593     for (i=0; i<nrecvs; i++) {
5594       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5595       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5596       for (j=0; j<nrows; j++) {
5597         b_othi[k+1] = b_othi[k] + rowlen[j];
5598         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5599         k++;
5600       }
5601       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5602     }
5603     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5604 
5605     /* allocate space for j and a arrrays of B_oth */
5606     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5607     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5608 
5609     /* j-array */
5610     /*---------*/
5611     /*  post receives of j-array */
5612     for (i=0; i<nrecvs; i++) {
5613       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5614       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5615     }
5616 
5617     /* pack the outgoing message j-array */
5618     if (nsends) k = sstarts[0];
5619     for (i=0; i<nsends; i++) {
5620       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5621       bufJ  = bufj+sstartsj[i];
5622       for (j=0; j<nrows; j++) {
5623         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5624         for (ll=0; ll<sbs; ll++) {
5625           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5626           for (l=0; l<ncols; l++) {
5627             *bufJ++ = cols[l];
5628           }
5629           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5630         }
5631       }
5632       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5633     }
5634 
5635     /* recvs and sends of j-array are completed */
5636     i = nrecvs;
5637     while (i--) {
5638       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5639     }
5640     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5641   } else if (scall == MAT_REUSE_MATRIX) {
5642     sstartsj = *startsj_s;
5643     rstartsj = *startsj_r;
5644     bufa     = *bufa_ptr;
5645     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5646     b_otha   = b_oth->a;
5647   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5648 
5649   /* a-array */
5650   /*---------*/
5651   /*  post receives of a-array */
5652   for (i=0; i<nrecvs; i++) {
5653     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5654     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5655   }
5656 
5657   /* pack the outgoing message a-array */
5658   if (nsends) k = sstarts[0];
5659   for (i=0; i<nsends; i++) {
5660     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5661     bufA  = bufa+sstartsj[i];
5662     for (j=0; j<nrows; j++) {
5663       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5664       for (ll=0; ll<sbs; ll++) {
5665         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5666         for (l=0; l<ncols; l++) {
5667           *bufA++ = vals[l];
5668         }
5669         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5670       }
5671     }
5672     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5673   }
5674   /* recvs and sends of a-array are completed */
5675   i = nrecvs;
5676   while (i--) {
5677     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5678   }
5679   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5680   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5681 
5682   if (scall == MAT_INITIAL_MATRIX) {
5683     /* put together the new matrix */
5684     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5685 
5686     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5687     /* Since these are PETSc arrays, change flags to free them as necessary. */
5688     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5689     b_oth->free_a  = PETSC_TRUE;
5690     b_oth->free_ij = PETSC_TRUE;
5691     b_oth->nonew   = 0;
5692 
5693     ierr = PetscFree(bufj);CHKERRQ(ierr);
5694     if (!startsj_s || !bufa_ptr) {
5695       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5696       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5697     } else {
5698       *startsj_s = sstartsj;
5699       *startsj_r = rstartsj;
5700       *bufa_ptr  = bufa;
5701     }
5702   }
5703 
5704   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5705   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5706   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5707   PetscFunctionReturn(0);
5708 }
5709 
5710 /*@C
5711   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5712 
5713   Not Collective
5714 
5715   Input Parameters:
5716 . A - The matrix in mpiaij format
5717 
5718   Output Parameter:
5719 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5720 . colmap - A map from global column index to local index into lvec
5721 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5722 
5723   Level: developer
5724 
5725 @*/
5726 #if defined(PETSC_USE_CTABLE)
5727 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5728 #else
5729 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5730 #endif
5731 {
5732   Mat_MPIAIJ *a;
5733 
5734   PetscFunctionBegin;
5735   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5736   PetscValidPointer(lvec, 2);
5737   PetscValidPointer(colmap, 3);
5738   PetscValidPointer(multScatter, 4);
5739   a = (Mat_MPIAIJ*) A->data;
5740   if (lvec) *lvec = a->lvec;
5741   if (colmap) *colmap = a->colmap;
5742   if (multScatter) *multScatter = a->Mvctx;
5743   PetscFunctionReturn(0);
5744 }
5745 
5746 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5749 #if defined(PETSC_HAVE_MKL_SPARSE)
5750 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5751 #endif
5752 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5753 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5754 #if defined(PETSC_HAVE_ELEMENTAL)
5755 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5756 #endif
5757 #if defined(PETSC_HAVE_HYPRE)
5758 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5759 #endif
5760 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5761 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5762 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5763 
5764 /*
5765     Computes (B'*A')' since computing B*A directly is untenable
5766 
5767                n                       p                          p
5768         (              )       (              )         (                  )
5769       m (      A       )  *  n (       B      )   =   m (         C        )
5770         (              )       (              )         (                  )
5771 
5772 */
5773 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5774 {
5775   PetscErrorCode ierr;
5776   Mat            At,Bt,Ct;
5777 
5778   PetscFunctionBegin;
5779   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5780   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5781   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5782   ierr = MatDestroy(&At);CHKERRQ(ierr);
5783   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5784   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5785   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5786   PetscFunctionReturn(0);
5787 }
5788 
5789 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5790 {
5791   PetscErrorCode ierr;
5792   PetscInt       m=A->rmap->n,n=B->cmap->n;
5793 
5794   PetscFunctionBegin;
5795   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5796   ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5797   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5798   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5799   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
5800   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5801   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5802 
5803   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5804   PetscFunctionReturn(0);
5805 }
5806 
5807 /* ----------------------------------------------------------------*/
5808 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5809 {
5810   Mat_Product *product = C->product;
5811   Mat         A = product->A,B=product->B;
5812 
5813   PetscFunctionBegin;
5814   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5815     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5816 
5817   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5818   C->ops->productsymbolic = MatProductSymbolic_AB;
5819   PetscFunctionReturn(0);
5820 }
5821 
5822 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5823 {
5824   PetscErrorCode ierr;
5825   Mat_Product    *product = C->product;
5826 
5827   PetscFunctionBegin;
5828   if (product->type == MATPRODUCT_AB) {
5829     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5830   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]);
5831   PetscFunctionReturn(0);
5832 }
5833 /* ----------------------------------------------------------------*/
5834 
5835 /*MC
5836    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5837 
5838    Options Database Keys:
5839 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5840 
5841    Level: beginner
5842 
5843    Notes:
5844     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5845     in this case the values associated with the rows and columns one passes in are set to zero
5846     in the matrix
5847 
5848     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5849     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5850 
5851 .seealso: MatCreateAIJ()
5852 M*/
5853 
5854 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5855 {
5856   Mat_MPIAIJ     *b;
5857   PetscErrorCode ierr;
5858   PetscMPIInt    size;
5859 
5860   PetscFunctionBegin;
5861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5862 
5863   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5864   B->data       = (void*)b;
5865   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5866   B->assembled  = PETSC_FALSE;
5867   B->insertmode = NOT_SET_VALUES;
5868   b->size       = size;
5869 
5870   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5871 
5872   /* build cache for off array entries formed */
5873   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5874 
5875   b->donotstash  = PETSC_FALSE;
5876   b->colmap      = 0;
5877   b->garray      = 0;
5878   b->roworiented = PETSC_TRUE;
5879 
5880   /* stuff used for matrix vector multiply */
5881   b->lvec  = NULL;
5882   b->Mvctx = NULL;
5883 
5884   /* stuff for MatGetRow() */
5885   b->rowindices   = 0;
5886   b->rowvalues    = 0;
5887   b->getrowactive = PETSC_FALSE;
5888 
5889   /* flexible pointer used in CUSP/CUSPARSE classes */
5890   b->spptr = NULL;
5891 
5892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5894   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5897   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5899   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5900   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5902 #if defined(PETSC_HAVE_MKL_SPARSE)
5903   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5904 #endif
5905   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5906   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5907   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5908 #if defined(PETSC_HAVE_ELEMENTAL)
5909   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5910 #endif
5911   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5912   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5914   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5915 #if defined(PETSC_HAVE_HYPRE)
5916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5917   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5918 #endif
5919   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5920   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5921   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5922   PetscFunctionReturn(0);
5923 }
5924 
5925 /*@C
5926      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5927          and "off-diagonal" part of the matrix in CSR format.
5928 
5929    Collective
5930 
5931    Input Parameters:
5932 +  comm - MPI communicator
5933 .  m - number of local rows (Cannot be PETSC_DECIDE)
5934 .  n - This value should be the same as the local size used in creating the
5935        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5936        calculated if N is given) For square matrices n is almost always m.
5937 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5938 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5939 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5940 .   j - column indices
5941 .   a - matrix values
5942 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5943 .   oj - column indices
5944 -   oa - matrix values
5945 
5946    Output Parameter:
5947 .   mat - the matrix
5948 
5949    Level: advanced
5950 
5951    Notes:
5952        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5953        must free the arrays once the matrix has been destroyed and not before.
5954 
5955        The i and j indices are 0 based
5956 
5957        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5958 
5959        This sets local rows and cannot be used to set off-processor values.
5960 
5961        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5962        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5963        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5964        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5965        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5966        communication if it is known that only local entries will be set.
5967 
5968 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5969           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5970 @*/
5971 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5972 {
5973   PetscErrorCode ierr;
5974   Mat_MPIAIJ     *maij;
5975 
5976   PetscFunctionBegin;
5977   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5978   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5979   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5980   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5981   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5982   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5983   maij = (Mat_MPIAIJ*) (*mat)->data;
5984 
5985   (*mat)->preallocated = PETSC_TRUE;
5986 
5987   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5988   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5989 
5990   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5991   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5992 
5993   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5994   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5995   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5996   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5997 
5998   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5999   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6000   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6001   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6002   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6003   PetscFunctionReturn(0);
6004 }
6005 
6006 /*
6007     Special version for direct calls from Fortran
6008 */
6009 #include <petsc/private/fortranimpl.h>
6010 
6011 /* Change these macros so can be used in void function */
6012 #undef CHKERRQ
6013 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6014 #undef SETERRQ2
6015 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6016 #undef SETERRQ3
6017 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6018 #undef SETERRQ
6019 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6020 
6021 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6022 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6023 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6024 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6025 #else
6026 #endif
6027 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6028 {
6029   Mat            mat  = *mmat;
6030   PetscInt       m    = *mm, n = *mn;
6031   InsertMode     addv = *maddv;
6032   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6033   PetscScalar    value;
6034   PetscErrorCode ierr;
6035 
6036   MatCheckPreallocated(mat,1);
6037   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6038 
6039 #if defined(PETSC_USE_DEBUG)
6040   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6041 #endif
6042   {
6043     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6044     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6045     PetscBool roworiented = aij->roworiented;
6046 
6047     /* Some Variables required in the macro */
6048     Mat        A                    = aij->A;
6049     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6050     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6051     MatScalar  *aa                  = a->a;
6052     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6053     Mat        B                    = aij->B;
6054     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6055     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6056     MatScalar  *ba                  = b->a;
6057     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6058      * cannot use "#if defined" inside a macro. */
6059     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6060 
6061     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6062     PetscInt  nonew = a->nonew;
6063     MatScalar *ap1,*ap2;
6064 
6065     PetscFunctionBegin;
6066     for (i=0; i<m; i++) {
6067       if (im[i] < 0) continue;
6068 #if defined(PETSC_USE_DEBUG)
6069       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6070 #endif
6071       if (im[i] >= rstart && im[i] < rend) {
6072         row      = im[i] - rstart;
6073         lastcol1 = -1;
6074         rp1      = aj + ai[row];
6075         ap1      = aa + ai[row];
6076         rmax1    = aimax[row];
6077         nrow1    = ailen[row];
6078         low1     = 0;
6079         high1    = nrow1;
6080         lastcol2 = -1;
6081         rp2      = bj + bi[row];
6082         ap2      = ba + bi[row];
6083         rmax2    = bimax[row];
6084         nrow2    = bilen[row];
6085         low2     = 0;
6086         high2    = nrow2;
6087 
6088         for (j=0; j<n; j++) {
6089           if (roworiented) value = v[i*n+j];
6090           else value = v[i+j*m];
6091           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6092           if (in[j] >= cstart && in[j] < cend) {
6093             col = in[j] - cstart;
6094             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6095 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6096             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6097 #endif
6098           } else if (in[j] < 0) continue;
6099 #if defined(PETSC_USE_DEBUG)
6100           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6101           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6102 #endif
6103           else {
6104             if (mat->was_assembled) {
6105               if (!aij->colmap) {
6106                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6107               }
6108 #if defined(PETSC_USE_CTABLE)
6109               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6110               col--;
6111 #else
6112               col = aij->colmap[in[j]] - 1;
6113 #endif
6114               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6115                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6116                 col  =  in[j];
6117                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6118                 B        = aij->B;
6119                 b        = (Mat_SeqAIJ*)B->data;
6120                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6121                 rp2      = bj + bi[row];
6122                 ap2      = ba + bi[row];
6123                 rmax2    = bimax[row];
6124                 nrow2    = bilen[row];
6125                 low2     = 0;
6126                 high2    = nrow2;
6127                 bm       = aij->B->rmap->n;
6128                 ba       = b->a;
6129                 inserted = PETSC_FALSE;
6130               }
6131             } else col = in[j];
6132             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6133 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6134             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6135 #endif
6136           }
6137         }
6138       } else if (!aij->donotstash) {
6139         if (roworiented) {
6140           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6141         } else {
6142           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6143         }
6144       }
6145     }
6146   }
6147   PetscFunctionReturnVoid();
6148 }
6149