xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a3fdcf43bac02a6759bb705cf4bb50368f8ea553)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
459       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
460       rp1[_i] = col;  \
461       ap1[_i] = value;  \
462       A->nonzerostate++;\
463       a_noinsert: ; \
464       ailen[row] = nrow1; \
465 }
466 
467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
468   { \
469     if (col <= lastcol2) low2 = 0;                        \
470     else high2 = nrow2;                                   \
471     lastcol2 = col;                                       \
472     while (high2-low2 > 5) {                              \
473       t = (low2+high2)/2;                                 \
474       if (rp2[t] > col) high2 = t;                        \
475       else             low2  = t;                         \
476     }                                                     \
477     for (_i=low2; _i<high2; _i++) {                       \
478       if (rp2[_i] > col) break;                           \
479       if (rp2[_i] == col) {                               \
480         if (addv == ADD_VALUES) {                         \
481           ap2[_i] += value;                               \
482           (void)PetscLogFlops(1.0);                       \
483         }                                                 \
484         else                    ap2[_i] = value;          \
485         goto b_noinsert;                                  \
486       }                                                   \
487     }                                                     \
488     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
489     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
490     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
491     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
492     N = nrow2++ - 1; b->nz++; high2++;                    \
493     /* shift up all the later entries in this row */      \
494     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
495     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
496     rp2[_i] = col;                                        \
497     ap2[_i] = value;                                      \
498     B->nonzerostate++;                                    \
499     b_noinsert: ;                                         \
500     bilen[row] = nrow2;                                   \
501   }
502 
503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
504 {
505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
506   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
507   PetscErrorCode ierr;
508   PetscInt       l,*garray = mat->garray,diag;
509 
510   PetscFunctionBegin;
511   /* code only works for square matrices A */
512 
513   /* find size of row to the left of the diagonal part */
514   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
515   row  = row - diag;
516   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
517     if (garray[b->j[b->i[row]+l]] > diag) break;
518   }
519   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
520 
521   /* diagonal part */
522   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
523 
524   /* right of diagonal part */
525   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
530 {
531   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
532   PetscScalar    value = 0.0;
533   PetscErrorCode ierr;
534   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
536   PetscBool      roworiented = aij->roworiented;
537 
538   /* Some Variables required in the macro */
539   Mat        A                 = aij->A;
540   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
541   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
542   MatScalar  *aa               = a->a;
543   PetscBool  ignorezeroentries = a->ignorezeroentries;
544   Mat        B                 = aij->B;
545   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
546   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
547   MatScalar  *ba               = b->a;
548 
549   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
550   PetscInt  nonew;
551   MatScalar *ap1,*ap2;
552 
553   PetscFunctionBegin;
554   for (i=0; i<m; i++) {
555     if (im[i] < 0) continue;
556 #if defined(PETSC_USE_DEBUG)
557     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
558 #endif
559     if (im[i] >= rstart && im[i] < rend) {
560       row      = im[i] - rstart;
561       lastcol1 = -1;
562       rp1      = aj + ai[row];
563       ap1      = aa + ai[row];
564       rmax1    = aimax[row];
565       nrow1    = ailen[row];
566       low1     = 0;
567       high1    = nrow1;
568       lastcol2 = -1;
569       rp2      = bj + bi[row];
570       ap2      = ba + bi[row];
571       rmax2    = bimax[row];
572       nrow2    = bilen[row];
573       low2     = 0;
574       high2    = nrow2;
575 
576       for (j=0; j<n; j++) {
577         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
578         if (in[j] >= cstart && in[j] < cend) {
579           col   = in[j] - cstart;
580           nonew = a->nonew;
581           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) {
614               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
615                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
616               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
617             }
618           } else col = in[j];
619           nonew = b->nonew;
620           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
621         }
622       }
623     } else {
624       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
625       if (!aij->donotstash) {
626         mat->assembled = PETSC_FALSE;
627         if (roworiented) {
628           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         } else {
630           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
631         }
632       }
633     }
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
646   Mat            A           = aij->A; /* diagonal part of the matrix */
647   Mat            B           = aij->B; /* offdiagonal part of the matrix */
648   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
649   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
650   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
651   PetscInt       *ailen      = a->ilen,*aj = a->j;
652   PetscInt       *bilen      = b->ilen,*bj = b->j;
653   PetscInt       am          = aij->A->rmap->n,j;
654   PetscInt       diag_so_far = 0,dnz;
655   PetscInt       offd_so_far = 0,onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j=0; j<am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(0);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
688   Mat            A      = aij->A; /* diagonal part of the matrix */
689   Mat            B      = aij->B; /* offdiagonal part of the matrix */
690   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
691   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
692   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
693   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
694   PetscInt       *ailen = a->ilen,*aj = a->j;
695   PetscInt       *bilen = b->ilen,*bj = b->j;
696   PetscInt       am     = aij->A->rmap->n,j;
697   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
699   PetscScalar    *aa = a->a,*ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j=0; j<am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd = full_offd_i[j];
706     rowstart_diag = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag+dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd+onz_row] = mat_j[col];
716         ba[rowstart_offd+onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
727 {
728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
729   PetscErrorCode ierr;
730   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
731   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
732 
733   PetscFunctionBegin;
734   for (i=0; i<m; i++) {
735     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
736     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
737     if (idxm[i] >= rstart && idxm[i] < rend) {
738       row = idxm[i] - rstart;
739       for (j=0; j<n; j++) {
740         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
741         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
742         if (idxn[j] >= cstart && idxn[j] < cend) {
743           col  = idxn[j] - cstart;
744           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
745         } else {
746           if (!aij->colmap) {
747             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
748           }
749 #if defined(PETSC_USE_CTABLE)
750           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
751           col--;
752 #else
753           col = aij->colmap[idxn[j]] - 1;
754 #endif
755           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
756           else {
757             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
758           }
759         }
760       }
761     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
767 
768 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
769 {
770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
771   PetscErrorCode ierr;
772   PetscInt       nstash,reallocs;
773 
774   PetscFunctionBegin;
775   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
776 
777   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
778   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
779   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
784 {
785   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
786   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
787   PetscErrorCode ierr;
788   PetscMPIInt    n;
789   PetscInt       i,j,rstart,ncols,flg;
790   PetscInt       *row,*col;
791   PetscBool      other_disassembled;
792   PetscScalar    *val;
793 
794   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
795 
796   PetscFunctionBegin;
797   if (!aij->donotstash && !mat->nooffprocentries) {
798     while (1) {
799       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
800       if (!flg) break;
801 
802       for (i=0; i<n; ) {
803         /* Now identify the consecutive vals belonging to the same row */
804         for (j=i,rstart=row[j]; j<n; j++) {
805           if (row[j] != rstart) break;
806         }
807         if (j < n) ncols = j-i;
808         else       ncols = n-i;
809         /* Now assemble all these values with a single function call */
810         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
811 
812         i = j;
813       }
814     }
815     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
816   }
817 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
818   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
819 #endif
820   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
821   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
822 
823   /* determine if any processor has disassembled, if so we must
824      also disassemble ourself, in order that we may reassemble. */
825   /*
826      if nonzero structure of submatrix B cannot change then we know that
827      no processor disassembled thus we can skip this stuff
828   */
829   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
830     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
831     if (mat->was_assembled && !other_disassembled) {
832 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
833       aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
834 #endif
835       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
836     }
837   }
838   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
839     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
840   }
841   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
842 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
843   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
844 #endif
845   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
846   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
847 
848   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
849 
850   aij->rowvalues = 0;
851 
852   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
853   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
854 
855   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
856   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
857     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
858     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
859   }
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
862 #endif
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
867 {
868   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
869   PetscErrorCode ierr;
870 
871   PetscFunctionBegin;
872   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
873   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
878 {
879   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
880   PetscObjectState sA, sB;
881   PetscInt        *lrows;
882   PetscInt         r, len;
883   PetscBool        cong, lch, gch;
884   PetscErrorCode   ierr;
885 
886   PetscFunctionBegin;
887   /* get locally owned rows */
888   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
889   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
890   /* fix right hand side if needed */
891   if (x && b) {
892     const PetscScalar *xx;
893     PetscScalar       *bb;
894 
895     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
896     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
897     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
898     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
899     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
900     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
901   }
902 
903   sA = mat->A->nonzerostate;
904   sB = mat->B->nonzerostate;
905 
906   if (diag != 0.0 && cong) {
907     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
908     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
909   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
910     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
911     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
912     PetscInt   nnwA, nnwB;
913     PetscBool  nnzA, nnzB;
914 
915     nnwA = aijA->nonew;
916     nnwB = aijB->nonew;
917     nnzA = aijA->keepnonzeropattern;
918     nnzB = aijB->keepnonzeropattern;
919     if (!nnzA) {
920       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
921       aijA->nonew = 0;
922     }
923     if (!nnzB) {
924       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
925       aijB->nonew = 0;
926     }
927     /* Must zero here before the next loop */
928     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
929     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
930     for (r = 0; r < len; ++r) {
931       const PetscInt row = lrows[r] + A->rmap->rstart;
932       if (row >= A->cmap->N) continue;
933       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
934     }
935     aijA->nonew = nnwA;
936     aijB->nonew = nnwB;
937   } else {
938     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
939     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
940   }
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
943   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
944 
945   /* reduce nonzerostate */
946   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
947   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
948   if (gch) A->nonzerostate++;
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
953 {
954   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       n = A->rmap->n;
957   PetscInt          i,j,r,m,p = 0,len = 0;
958   PetscInt          *lrows,*owners = A->rmap->range;
959   PetscSFNode       *rrows;
960   PetscSF           sf;
961   const PetscScalar *xx;
962   PetscScalar       *bb,*mask;
963   Vec               xmask,lmask;
964   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
965   const PetscInt    *aj, *ii,*ridx;
966   PetscScalar       *aa;
967 
968   PetscFunctionBegin;
969   /* Create SF where leaves are input rows and roots are owned rows */
970   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
971   for (r = 0; r < n; ++r) lrows[r] = -1;
972   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
973   for (r = 0; r < N; ++r) {
974     const PetscInt idx   = rows[r];
975     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
976     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
977       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
978     }
979     rrows[r].rank  = p;
980     rrows[r].index = rows[r] - owners[p];
981   }
982   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
983   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
984   /* Collect flags for rows to be zeroed */
985   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
986   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
987   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
988   /* Compress and put in row numbers */
989   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
990   /* zero diagonal part of matrix */
991   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
992   /* handle off diagonal part of matrix */
993   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<len; i++) bb[lrows[i]] = 1;
997   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
998   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1000   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1001   if (x && b) { /* this code is buggy when the row and column layout don't match */
1002     PetscBool cong;
1003 
1004     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1005     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1006     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1009     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1010   }
1011   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1012   /* remove zeroed rows of off diagonal matrix */
1013   ii = aij->i;
1014   for (i=0; i<len; i++) {
1015     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1016   }
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use) {
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++) {
1023       n  = ii[i+1] - ii[i];
1024       aj = aij->j + ii[i];
1025       aa = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n  = ii[i+1] - ii[i];
1041       aj = aij->j + ii[i];
1042       aa = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x && b) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* only change matrix nonzero state if pattern was allowed to be changed */
1062   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1063     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1064     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1070 {
1071   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1072   PetscErrorCode ierr;
1073   PetscInt       nt;
1074   VecScatter     Mvctx = a->Mvctx;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1078   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1079 
1080   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101   VecScatter     Mvctx = a->Mvctx;
1102 
1103   PetscFunctionBegin;
1104   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1105   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1106   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1113 {
1114   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   /* do nondiagonal part */
1119   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1120   /* do local part */
1121   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1122   /* add partial results together */
1123   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1124   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1129 {
1130   MPI_Comm       comm;
1131   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1132   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1133   IS             Me,Notme;
1134   PetscErrorCode ierr;
1135   PetscInt       M,N,first,last,*notme,i;
1136   PetscBool      lf;
1137   PetscMPIInt    size;
1138 
1139   PetscFunctionBegin;
1140   /* Easy test: symmetric diagonal block */
1141   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1142   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1143   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1144   if (!*f) PetscFunctionReturn(0);
1145   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1147   if (size == 1) PetscFunctionReturn(0);
1148 
1149   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1150   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1151   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1152   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1153   for (i=0; i<first; i++) notme[i] = i;
1154   for (i=last; i<M; i++) notme[i-last+first] = i;
1155   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1156   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1157   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1158   Aoff = Aoffs[0];
1159   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1160   Boff = Boffs[0];
1161   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1162   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1163   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1164   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1165   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1166   ierr = PetscFree(notme);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1171 {
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1180 {
1181   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   /* do local part */
1188   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1189   /* add partial results together */
1190   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 /*
1196   This only works correctly for square matrices where the subblock A->A is the
1197    diagonal block
1198 */
1199 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1200 {
1201   PetscErrorCode ierr;
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203 
1204   PetscFunctionBegin;
1205   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1206   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1207   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1212 {
1213   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1218   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1223 {
1224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228 #if defined(PETSC_USE_LOG)
1229   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1230 #endif
1231   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1232   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1233   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1234   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1235 #if defined(PETSC_USE_CTABLE)
1236   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1237 #else
1238   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1239 #endif
1240   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1241   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1242   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1243   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1244   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1245   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1246   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1247 
1248   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1257 #if defined(PETSC_HAVE_ELEMENTAL)
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1259 #endif
1260 #if defined(PETSC_HAVE_HYPRE)
1261   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1262   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1265   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1270 {
1271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1272   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1273   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1274   PetscErrorCode ierr;
1275   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1276   int            fd;
1277   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1278   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1279   PetscScalar    *column_values;
1280   PetscInt       message_count,flowcontrolcount;
1281   FILE           *file;
1282 
1283   PetscFunctionBegin;
1284   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1285   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1286   nz   = A->nz + B->nz;
1287   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1288   if (!rank) {
1289     header[0] = MAT_FILE_CLASSID;
1290     header[1] = mat->rmap->N;
1291     header[2] = mat->cmap->N;
1292 
1293     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1295     /* get largest number of rows any processor has */
1296     rlen  = mat->rmap->n;
1297     range = mat->rmap->range;
1298     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1299   } else {
1300     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     rlen = mat->rmap->n;
1302   }
1303 
1304   /* load up the local row counts */
1305   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1306   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1307 
1308   /* store the row lengths to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1312     for (i=1; i<size; i++) {
1313       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1314       rlen = range[i+1] - range[i];
1315       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1317     }
1318     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1319   } else {
1320     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1321     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1325 
1326   /* load up the local column indices */
1327   nzmax = nz; /* th processor needs space a largest processor needs */
1328   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1329   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1330   cnt   = 0;
1331   for (i=0; i<mat->rmap->n; i++) {
1332     for (j=B->i[i]; j<B->i[i+1]; j++) {
1333       if ((col = garray[B->j[j]]) > cstart) break;
1334       column_indices[cnt++] = col;
1335     }
1336     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1337     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1338   }
1339   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1340 
1341   /* store the column indices to the file */
1342   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1343   if (!rank) {
1344     MPI_Status status;
1345     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1346     for (i=1; i<size; i++) {
1347       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1348       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1349       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1350       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     }
1353     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1356     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1359   }
1360   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1361 
1362   /* load up the local column values */
1363   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1364   cnt  = 0;
1365   for (i=0; i<mat->rmap->n; i++) {
1366     for (j=B->i[i]; j<B->i[i+1]; j++) {
1367       if (garray[B->j[j]] > cstart) break;
1368       column_values[cnt++] = B->a[j];
1369     }
1370     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1371     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1372   }
1373   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1374 
1375   /* store the column values to the file */
1376   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1377   if (!rank) {
1378     MPI_Status status;
1379     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1380     for (i=1; i<size; i++) {
1381       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1382       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1383       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1384       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1385       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1386     }
1387     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1388   } else {
1389     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1390     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1393   }
1394   ierr = PetscFree(column_values);CHKERRQ(ierr);
1395 
1396   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1397   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #include <petscdraw.h>
1402 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1403 {
1404   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1405   PetscErrorCode    ierr;
1406   PetscMPIInt       rank = aij->rank,size = aij->size;
1407   PetscBool         isdraw,iascii,isbinary;
1408   PetscViewer       sviewer;
1409   PetscViewerFormat format;
1410 
1411   PetscFunctionBegin;
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1415   if (iascii) {
1416     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1417     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1418       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1419       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1420       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1421       for (i=0; i<(PetscInt)size; i++) {
1422         nmax = PetscMax(nmax,nz[i]);
1423         nmin = PetscMin(nmin,nz[i]);
1424         navg += nz[i];
1425       }
1426       ierr = PetscFree(nz);CHKERRQ(ierr);
1427       navg = navg/size;
1428       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1429       PetscFunctionReturn(0);
1430     }
1431     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1432     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1433       MatInfo   info;
1434       PetscBool inodes;
1435 
1436       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1437       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1438       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1440       if (!inodes) {
1441         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1442                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1443       } else {
1444         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1445                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1446       }
1447       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1449       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1450       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1451       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1454       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1455       PetscFunctionReturn(0);
1456     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1457       PetscInt inodecount,inodelimit,*inodes;
1458       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1459       if (inodes) {
1460         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1461       } else {
1462         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1463       }
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1466       PetscFunctionReturn(0);
1467     }
1468   } else if (isbinary) {
1469     if (size == 1) {
1470       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1471       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1472     } else {
1473       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1474     }
1475     PetscFunctionReturn(0);
1476   } else if (iascii && size == 1) {
1477     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1478     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1479     PetscFunctionReturn(0);
1480   } else if (isdraw) {
1481     PetscDraw draw;
1482     PetscBool isnull;
1483     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1484     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1485     if (isnull) PetscFunctionReturn(0);
1486   }
1487 
1488   { /* assemble the entire matrix onto first processor */
1489     Mat A = NULL, Av;
1490     IS  isrow,iscol;
1491 
1492     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1493     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1494     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1495     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1496 /*  The commented code uses MatCreateSubMatrices instead */
1497 /*
1498     Mat *AA, A = NULL, Av;
1499     IS  isrow,iscol;
1500 
1501     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1502     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1503     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1504     if (!rank) {
1505        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1506        A    = AA[0];
1507        Av   = AA[0];
1508     }
1509     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1510 */
1511     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1512     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1513     /*
1514        Everyone has to call to draw the matrix since the graphics waits are
1515        synchronized across all processors that share the PetscDraw object
1516     */
1517     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1518     if (!rank) {
1519       if (((PetscObject)mat)->name) {
1520         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1521       }
1522       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1523     }
1524     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1525     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1526     ierr = MatDestroy(&A);CHKERRQ(ierr);
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1532 {
1533   PetscErrorCode ierr;
1534   PetscBool      iascii,isdraw,issocket,isbinary;
1535 
1536   PetscFunctionBegin;
1537   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1538   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1539   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1540   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1541   if (iascii || isdraw || isbinary || issocket) {
1542     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1543   }
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1548 {
1549   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1550   PetscErrorCode ierr;
1551   Vec            bb1 = 0;
1552   PetscBool      hasop;
1553 
1554   PetscFunctionBegin;
1555   if (flag == SOR_APPLY_UPPER) {
1556     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1557     PetscFunctionReturn(0);
1558   }
1559 
1560   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1561     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1562   }
1563 
1564   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569 
1570     while (its--) {
1571       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573 
1574       /* update rhs: bb1 = bb - B*x */
1575       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1576       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1577 
1578       /* local sweep */
1579       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1580     }
1581   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1582     if (flag & SOR_ZERO_INITIAL_GUESS) {
1583       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1584       its--;
1585     }
1586     while (its--) {
1587       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589 
1590       /* update rhs: bb1 = bb - B*x */
1591       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1592       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1593 
1594       /* local sweep */
1595       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1596     }
1597   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1598     if (flag & SOR_ZERO_INITIAL_GUESS) {
1599       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1600       its--;
1601     }
1602     while (its--) {
1603       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605 
1606       /* update rhs: bb1 = bb - B*x */
1607       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1608       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1609 
1610       /* local sweep */
1611       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1612     }
1613   } else if (flag & SOR_EISENSTAT) {
1614     Vec xx1;
1615 
1616     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1617     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1618 
1619     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1620     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1621     if (!mat->diag) {
1622       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1623       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1624     }
1625     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1626     if (hasop) {
1627       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1628     } else {
1629       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1630     }
1631     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1632 
1633     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1634 
1635     /* local sweep */
1636     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1637     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1638     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1639   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1640 
1641   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1642 
1643   matin->factorerrortype = mat->A->factorerrortype;
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1648 {
1649   Mat            aA,aB,Aperm;
1650   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1651   PetscScalar    *aa,*ba;
1652   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1653   PetscSF        rowsf,sf;
1654   IS             parcolp = NULL;
1655   PetscBool      done;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1660   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1661   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1662   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1663 
1664   /* Invert row permutation to find out where my rows should go */
1665   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1666   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1667   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1668   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1669   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1670   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1671 
1672   /* Invert column permutation to find out where my columns should go */
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1677   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1678   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1679   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1680 
1681   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1682   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1683   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1684 
1685   /* Find out where my gcols should go */
1686   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1687   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1688   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1689   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1690   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1691   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1692   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1693   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1694 
1695   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1696   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1697   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1698   for (i=0; i<m; i++) {
1699     PetscInt row = rdest[i],rowner;
1700     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1701     for (j=ai[i]; j<ai[i+1]; j++) {
1702       PetscInt cowner,col = cdest[aj[j]];
1703       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1704       if (rowner == cowner) dnnz[i]++;
1705       else onnz[i]++;
1706     }
1707     for (j=bi[i]; j<bi[i+1]; j++) {
1708       PetscInt cowner,col = gcdest[bj[j]];
1709       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1710       if (rowner == cowner) dnnz[i]++;
1711       else onnz[i]++;
1712     }
1713   }
1714   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1715   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1716   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1717   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1718   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1719 
1720   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1721   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1722   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1723   for (i=0; i<m; i++) {
1724     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1725     PetscInt j0,rowlen;
1726     rowlen = ai[i+1] - ai[i];
1727     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1728       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1729       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1730     }
1731     rowlen = bi[i+1] - bi[i];
1732     for (j0=j=0; j<rowlen; j0=j) {
1733       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1734       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1735     }
1736   }
1737   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1738   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1740   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1741   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1742   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1743   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1744   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1745   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1746   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1747   *B = Aperm;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1752 {
1753   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1758   if (ghosts) *ghosts = aij->garray;
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1763 {
1764   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1765   Mat            A    = mat->A,B = mat->B;
1766   PetscErrorCode ierr;
1767   PetscReal      isend[5],irecv[5];
1768 
1769   PetscFunctionBegin;
1770   info->block_size = 1.0;
1771   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1772 
1773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1774   isend[3] = info->memory;  isend[4] = info->mallocs;
1775 
1776   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1777 
1778   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1779   isend[3] += info->memory;  isend[4] += info->mallocs;
1780   if (flag == MAT_LOCAL) {
1781     info->nz_used      = isend[0];
1782     info->nz_allocated = isend[1];
1783     info->nz_unneeded  = isend[2];
1784     info->memory       = isend[3];
1785     info->mallocs      = isend[4];
1786   } else if (flag == MAT_GLOBAL_MAX) {
1787     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1788 
1789     info->nz_used      = irecv[0];
1790     info->nz_allocated = irecv[1];
1791     info->nz_unneeded  = irecv[2];
1792     info->memory       = irecv[3];
1793     info->mallocs      = irecv[4];
1794   } else if (flag == MAT_GLOBAL_SUM) {
1795     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1796 
1797     info->nz_used      = irecv[0];
1798     info->nz_allocated = irecv[1];
1799     info->nz_unneeded  = irecv[2];
1800     info->memory       = irecv[3];
1801     info->mallocs      = irecv[4];
1802   }
1803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1804   info->fill_ratio_needed = 0;
1805   info->factor_mallocs    = 0;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1810 {
1811   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1812   PetscErrorCode ierr;
1813 
1814   PetscFunctionBegin;
1815   switch (op) {
1816   case MAT_NEW_NONZERO_LOCATIONS:
1817   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1818   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1819   case MAT_KEEP_NONZERO_PATTERN:
1820   case MAT_NEW_NONZERO_LOCATION_ERR:
1821   case MAT_USE_INODES:
1822   case MAT_IGNORE_ZERO_ENTRIES:
1823     MatCheckPreallocated(A,1);
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_ROW_ORIENTED:
1828     MatCheckPreallocated(A,1);
1829     a->roworiented = flg;
1830 
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_NEW_DIAGONALS:
1835   case MAT_SORTED_FULL:
1836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1837     break;
1838   case MAT_IGNORE_OFF_PROC_ENTRIES:
1839     a->donotstash = flg;
1840     break;
1841   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1842   case MAT_SPD:
1843   case MAT_SYMMETRIC:
1844   case MAT_STRUCTURALLY_SYMMETRIC:
1845   case MAT_HERMITIAN:
1846   case MAT_SYMMETRY_ETERNAL:
1847     break;
1848   case MAT_SUBMAT_SINGLEIS:
1849     A->submat_singleis = flg;
1850     break;
1851   case MAT_STRUCTURE_ONLY:
1852     /* The option is handled directly by MatSetOption() */
1853     break;
1854   default:
1855     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1856   }
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1861 {
1862   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1863   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1864   PetscErrorCode ierr;
1865   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1866   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1867   PetscInt       *cmap,*idx_p;
1868 
1869   PetscFunctionBegin;
1870   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1871   mat->getrowactive = PETSC_TRUE;
1872 
1873   if (!mat->rowvalues && (idx || v)) {
1874     /*
1875         allocate enough space to hold information from the longest row.
1876     */
1877     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1878     PetscInt   max = 1,tmp;
1879     for (i=0; i<matin->rmap->n; i++) {
1880       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1881       if (max < tmp) max = tmp;
1882     }
1883     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1884   }
1885 
1886   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1887   lrow = row - rstart;
1888 
1889   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1890   if (!v)   {pvA = 0; pvB = 0;}
1891   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1892   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1893   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1894   nztot = nzA + nzB;
1895 
1896   cmap = mat->garray;
1897   if (v  || idx) {
1898     if (nztot) {
1899       /* Sort by increasing column numbers, assuming A and B already sorted */
1900       PetscInt imark = -1;
1901       if (v) {
1902         *v = v_p = mat->rowvalues;
1903         for (i=0; i<nzB; i++) {
1904           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1905           else break;
1906         }
1907         imark = i;
1908         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1909         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1910       }
1911       if (idx) {
1912         *idx = idx_p = mat->rowindices;
1913         if (imark > -1) {
1914           for (i=0; i<imark; i++) {
1915             idx_p[i] = cmap[cworkB[i]];
1916           }
1917         } else {
1918           for (i=0; i<nzB; i++) {
1919             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1920             else break;
1921           }
1922           imark = i;
1923         }
1924         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1925         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1926       }
1927     } else {
1928       if (idx) *idx = 0;
1929       if (v)   *v   = 0;
1930     }
1931   }
1932   *nz  = nztot;
1933   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1934   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1939 {
1940   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1941 
1942   PetscFunctionBegin;
1943   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1944   aij->getrowactive = PETSC_FALSE;
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1949 {
1950   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1951   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1952   PetscErrorCode ierr;
1953   PetscInt       i,j,cstart = mat->cmap->rstart;
1954   PetscReal      sum = 0.0;
1955   MatScalar      *v;
1956 
1957   PetscFunctionBegin;
1958   if (aij->size == 1) {
1959     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1960   } else {
1961     if (type == NORM_FROBENIUS) {
1962       v = amat->a;
1963       for (i=0; i<amat->nz; i++) {
1964         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1965       }
1966       v = bmat->a;
1967       for (i=0; i<bmat->nz; i++) {
1968         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1969       }
1970       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       *norm = PetscSqrtReal(*norm);
1972       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1973     } else if (type == NORM_1) { /* max column norm */
1974       PetscReal *tmp,*tmp2;
1975       PetscInt  *jj,*garray = aij->garray;
1976       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1977       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1978       *norm = 0.0;
1979       v     = amat->a; jj = amat->j;
1980       for (j=0; j<amat->nz; j++) {
1981         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1982       }
1983       v = bmat->a; jj = bmat->j;
1984       for (j=0; j<bmat->nz; j++) {
1985         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1986       }
1987       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1988       for (j=0; j<mat->cmap->N; j++) {
1989         if (tmp2[j] > *norm) *norm = tmp2[j];
1990       }
1991       ierr = PetscFree(tmp);CHKERRQ(ierr);
1992       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1993       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1994     } else if (type == NORM_INFINITY) { /* max row norm */
1995       PetscReal ntemp = 0.0;
1996       for (j=0; j<aij->A->rmap->n; j++) {
1997         v   = amat->a + amat->i[j];
1998         sum = 0.0;
1999         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         v = bmat->a + bmat->i[j];
2003         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         if (sum > ntemp) ntemp = sum;
2007       }
2008       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2009       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2010     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2011   }
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2016 {
2017   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2018   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2019   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2020   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2021   PetscErrorCode  ierr;
2022   Mat             B,A_diag,*B_diag;
2023   const MatScalar *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061   }
2062 
2063   b           = (Mat_MPIAIJ*)B->data;
2064   A_diag      = a->A;
2065   B_diag      = &b->A;
2066   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2067   A_diag_ncol = A_diag->cmap->N;
2068   B_diag_ilen = sub_B_diag->ilen;
2069   B_diag_i    = sub_B_diag->i;
2070 
2071   /* Set ilen for diagonal of B */
2072   for (i=0; i<A_diag_ncol; i++) {
2073     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2074   }
2075 
2076   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2077   very quickly (=without using MatSetValues), because all writes are local. */
2078   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2079 
2080   /* copy over the B part */
2081   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2082   array = Bloc->a;
2083   row   = A->rmap->rstart;
2084   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2085   cols_tmp = cols;
2086   for (i=0; i<mb; i++) {
2087     ncol = bi[i+1]-bi[i];
2088     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2089     row++;
2090     array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2105 {
2106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2107   Mat            a    = aij->A,b = aij->B;
2108   PetscErrorCode ierr;
2109   PetscInt       s1,s2,s3;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2113   if (rr) {
2114     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2115     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2116     /* Overlap communication with computation. */
2117     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118   }
2119   if (ll) {
2120     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2121     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2122     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2123   }
2124   /* scale  the diagonal block */
2125   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2126 
2127   if (rr) {
2128     /* Do a scatter end and then right scale the off-diagonal block */
2129     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2130     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2146 {
2147   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2148   Mat            a,b,c,d;
2149   PetscBool      flg;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   a = matA->A; b = matA->B;
2154   c = matB->A; d = matB->B;
2155 
2156   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2157   if (flg) {
2158     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2159   }
2160   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2165 {
2166   PetscErrorCode ierr;
2167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2168   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2169 
2170   PetscFunctionBegin;
2171   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2172   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2173     /* because of the column compression in the off-processor part of the matrix a->B,
2174        the number of columns in a->B and b->B may be different, hence we cannot call
2175        the MatCopy() directly on the two parts. If need be, we can provide a more
2176        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2177        then copying the submatrices */
2178     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2179   } else {
2180     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2181     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2182   }
2183   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2188 {
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 /*
2197    Computes the number of nonzeros per row needed for preallocation when X and Y
2198    have different nonzero structure.
2199 */
2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2201 {
2202   PetscInt       i,j,k,nzx,nzy;
2203 
2204   PetscFunctionBegin;
2205   /* Set the number of nonzeros in the new matrix */
2206   for (i=0; i<m; i++) {
2207     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2208     nzx = xi[i+1] - xi[i];
2209     nzy = yi[i+1] - yi[i];
2210     nnz[i] = 0;
2211     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2212       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2213       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2214       nnz[i]++;
2215     }
2216     for (; k<nzy; k++) nnz[i]++;
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2223 {
2224   PetscErrorCode ierr;
2225   PetscInt       m = Y->rmap->N;
2226   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2227   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2238   PetscBLASInt   bnz,one=1;
2239   Mat_SeqAIJ     *x,*y;
2240 
2241   PetscFunctionBegin;
2242   if (str == SAME_NONZERO_PATTERN) {
2243     PetscScalar alpha = a;
2244     x    = (Mat_SeqAIJ*)xx->A->data;
2245     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2246     y    = (Mat_SeqAIJ*)yy->A->data;
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     x    = (Mat_SeqAIJ*)xx->B->data;
2249     y    = (Mat_SeqAIJ*)yy->B->data;
2250     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2251     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2252     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2253     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2254        will be updated */
2255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2256     if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
2257       Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
2258     }
2259 #endif
2260   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2261     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2262   } else {
2263     Mat      B;
2264     PetscInt *nnz_d,*nnz_o;
2265     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2266     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2267     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2268     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2269     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2270     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2271     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2272     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2273     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2274     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2275     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2276     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2277     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2278     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2279   }
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2284 
2285 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2286 {
2287 #if defined(PETSC_USE_COMPLEX)
2288   PetscErrorCode ierr;
2289   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2293   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2294 #else
2295   PetscFunctionBegin;
2296 #endif
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2301 {
2302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2307   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2312 {
2313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2318   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2323 {
2324   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2325   PetscErrorCode ierr;
2326   PetscInt       i,*idxb = 0;
2327   PetscScalar    *va,*vb;
2328   Vec            vtmp;
2329 
2330   PetscFunctionBegin;
2331   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2332   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2333   if (idx) {
2334     for (i=0; i<A->rmap->n; i++) {
2335       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2336     }
2337   }
2338 
2339   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2340   if (idx) {
2341     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2342   }
2343   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2344   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2345 
2346   for (i=0; i<A->rmap->n; i++) {
2347     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2348       va[i] = vb[i];
2349       if (idx) idx[i] = a->garray[idxb[i]];
2350     }
2351   }
2352 
2353   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2354   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2355   ierr = PetscFree(idxb);CHKERRQ(ierr);
2356   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2363   PetscErrorCode ierr;
2364   PetscInt       i,*idxb = 0;
2365   PetscScalar    *va,*vb;
2366   Vec            vtmp;
2367 
2368   PetscFunctionBegin;
2369   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2370   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2371   if (idx) {
2372     for (i=0; i<A->cmap->n; i++) {
2373       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2374     }
2375   }
2376 
2377   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2378   if (idx) {
2379     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2380   }
2381   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2382   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2383 
2384   for (i=0; i<A->rmap->n; i++) {
2385     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2386       va[i] = vb[i];
2387       if (idx) idx[i] = a->garray[idxb[i]];
2388     }
2389   }
2390 
2391   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2393   ierr = PetscFree(idxb);CHKERRQ(ierr);
2394   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2399 {
2400   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2401   PetscInt       n      = A->rmap->n;
2402   PetscInt       cstart = A->cmap->rstart;
2403   PetscInt       *cmap  = mat->garray;
2404   PetscInt       *diagIdx, *offdiagIdx;
2405   Vec            diagV, offdiagV;
2406   PetscScalar    *a, *diagA, *offdiagA;
2407   PetscInt       r;
2408   PetscErrorCode ierr;
2409 
2410   PetscFunctionBegin;
2411   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2412   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2413   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2414   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2415   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2416   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   for (r = 0; r < n; ++r) {
2420     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2421       a[r]   = diagA[r];
2422       idx[r] = cstart + diagIdx[r];
2423     } else {
2424       a[r]   = offdiagA[r];
2425       idx[r] = cmap[offdiagIdx[r]];
2426     }
2427   }
2428   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2429   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2430   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2431   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2432   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2433   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2438 {
2439   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2440   PetscInt       n      = A->rmap->n;
2441   PetscInt       cstart = A->cmap->rstart;
2442   PetscInt       *cmap  = mat->garray;
2443   PetscInt       *diagIdx, *offdiagIdx;
2444   Vec            diagV, offdiagV;
2445   PetscScalar    *a, *diagA, *offdiagA;
2446   PetscInt       r;
2447   PetscErrorCode ierr;
2448 
2449   PetscFunctionBegin;
2450   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2451   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2452   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2453   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2454   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2456   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2457   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2458   for (r = 0; r < n; ++r) {
2459     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2460       a[r]   = diagA[r];
2461       idx[r] = cstart + diagIdx[r];
2462     } else {
2463       a[r]   = offdiagA[r];
2464       idx[r] = cmap[offdiagIdx[r]];
2465     }
2466   }
2467   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2468   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2469   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2470   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2471   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2472   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2477 {
2478   PetscErrorCode ierr;
2479   Mat            *dummy;
2480 
2481   PetscFunctionBegin;
2482   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2483   *newmat = *dummy;
2484   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2489 {
2490   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2495   A->factorerrortype = a->A->factorerrortype;
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2500 {
2501   PetscErrorCode ierr;
2502   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2503 
2504   PetscFunctionBegin;
2505   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2506   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2507   if (x->assembled) {
2508     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2509   } else {
2510     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2511   }
2512   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2513   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2518 {
2519   PetscFunctionBegin;
2520   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2521   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 /*@
2526    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2527 
2528    Collective on Mat
2529 
2530    Input Parameters:
2531 +    A - the matrix
2532 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2533 
2534  Level: advanced
2535 
2536 @*/
2537 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2538 {
2539   PetscErrorCode       ierr;
2540 
2541   PetscFunctionBegin;
2542   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2547 {
2548   PetscErrorCode       ierr;
2549   PetscBool            sc = PETSC_FALSE,flg;
2550 
2551   PetscFunctionBegin;
2552   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2553   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2554   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2555   if (flg) {
2556     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2557   }
2558   ierr = PetscOptionsTail();CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2563 {
2564   PetscErrorCode ierr;
2565   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2566   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2567 
2568   PetscFunctionBegin;
2569   if (!Y->preallocated) {
2570     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2571   } else if (!aij->nz) {
2572     PetscInt nonew = aij->nonew;
2573     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2574     aij->nonew = nonew;
2575   }
2576   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2581 {
2582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2583   PetscErrorCode ierr;
2584 
2585   PetscFunctionBegin;
2586   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2587   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2588   if (d) {
2589     PetscInt rstart;
2590     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2591     *d += rstart;
2592 
2593   }
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2598 {
2599   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2600   PetscErrorCode ierr;
2601 
2602   PetscFunctionBegin;
2603   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 /* -------------------------------------------------------------------*/
2608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2609                                        MatGetRow_MPIAIJ,
2610                                        MatRestoreRow_MPIAIJ,
2611                                        MatMult_MPIAIJ,
2612                                 /* 4*/ MatMultAdd_MPIAIJ,
2613                                        MatMultTranspose_MPIAIJ,
2614                                        MatMultTransposeAdd_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*10*/ 0,
2619                                        0,
2620                                        0,
2621                                        MatSOR_MPIAIJ,
2622                                        MatTranspose_MPIAIJ,
2623                                 /*15*/ MatGetInfo_MPIAIJ,
2624                                        MatEqual_MPIAIJ,
2625                                        MatGetDiagonal_MPIAIJ,
2626                                        MatDiagonalScale_MPIAIJ,
2627                                        MatNorm_MPIAIJ,
2628                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2629                                        MatAssemblyEnd_MPIAIJ,
2630                                        MatSetOption_MPIAIJ,
2631                                        MatZeroEntries_MPIAIJ,
2632                                 /*24*/ MatZeroRows_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*29*/ MatSetUp_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        MatGetDiagonalBlock_MPIAIJ,
2641                                        0,
2642                                 /*34*/ MatDuplicate_MPIAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                 /*39*/ MatAXPY_MPIAIJ,
2648                                        MatCreateSubMatrices_MPIAIJ,
2649                                        MatIncreaseOverlap_MPIAIJ,
2650                                        MatGetValues_MPIAIJ,
2651                                        MatCopy_MPIAIJ,
2652                                 /*44*/ MatGetRowMax_MPIAIJ,
2653                                        MatScale_MPIAIJ,
2654                                        MatShift_MPIAIJ,
2655                                        MatDiagonalSet_MPIAIJ,
2656                                        MatZeroRowsColumns_MPIAIJ,
2657                                 /*49*/ MatSetRandom_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2663                                        0,
2664                                        MatSetUnfactored_MPIAIJ,
2665                                        MatPermute_MPIAIJ,
2666                                        0,
2667                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2668                                        MatDestroy_MPIAIJ,
2669                                        MatView_MPIAIJ,
2670                                        0,
2671                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2672                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2673                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2678                                        MatGetRowMinAbs_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*75*/ MatFDColoringApply_AIJ,
2684                                        MatSetFromOptions_MPIAIJ,
2685                                        0,
2686                                        0,
2687                                        MatFindZeroDiagonals_MPIAIJ,
2688                                 /*80*/ 0,
2689                                        0,
2690                                        0,
2691                                 /*83*/ MatLoad_MPIAIJ,
2692                                        MatIsSymmetric_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2698                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2699                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2700                                        MatPtAP_MPIAIJ_MPIAIJ,
2701                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2702                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                 /*99*/ 0,
2708                                        0,
2709                                        0,
2710                                        MatConjugate_MPIAIJ,
2711                                        0,
2712                                 /*104*/MatSetValuesRow_MPIAIJ,
2713                                        MatRealPart_MPIAIJ,
2714                                        MatImaginaryPart_MPIAIJ,
2715                                        0,
2716                                        0,
2717                                 /*109*/0,
2718                                        0,
2719                                        MatGetRowMin_MPIAIJ,
2720                                        0,
2721                                        MatMissingDiagonal_MPIAIJ,
2722                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2723                                        0,
2724                                        MatGetGhosts_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                 /*119*/0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        MatGetMultiProcBlock_MPIAIJ,
2732                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2733                                        MatGetColumnNorms_MPIAIJ,
2734                                        MatInvertBlockDiagonal_MPIAIJ,
2735                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2736                                        MatCreateSubMatricesMPI_MPIAIJ,
2737                                 /*129*/0,
2738                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2739                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2740                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2741                                        0,
2742                                 /*134*/0,
2743                                        0,
2744                                        MatRARt_MPIAIJ_MPIAIJ,
2745                                        0,
2746                                        0,
2747                                 /*139*/MatSetBlockSizes_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                        MatFDColoringSetUp_MPIXAIJ,
2751                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2752                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2753 };
2754 
2755 /* ----------------------------------------------------------------------------------------*/
2756 
2757 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2758 {
2759   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2760   PetscErrorCode ierr;
2761 
2762   PetscFunctionBegin;
2763   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2764   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2769 {
2770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2775   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2780 {
2781   Mat_MPIAIJ     *b;
2782   PetscErrorCode ierr;
2783   PetscMPIInt    size;
2784 
2785   PetscFunctionBegin;
2786   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2787   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2788   b = (Mat_MPIAIJ*)B->data;
2789 
2790 #if defined(PETSC_USE_CTABLE)
2791   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2792 #else
2793   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2794 #endif
2795   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2796   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2797   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2798 
2799   /* Because the B will have been resized we simply destroy it and create a new one each time */
2800   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2801   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2802   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2803   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2804   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2805   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2806   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2807 
2808   if (!B->preallocated) {
2809     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2810     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2811     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2812     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2813     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2814   }
2815 
2816   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2817   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2818   B->preallocated  = PETSC_TRUE;
2819   B->was_assembled = PETSC_FALSE;
2820   B->assembled     = PETSC_FALSE;
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2825 {
2826   Mat_MPIAIJ     *b;
2827   PetscErrorCode ierr;
2828 
2829   PetscFunctionBegin;
2830   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2831   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2833   b = (Mat_MPIAIJ*)B->data;
2834 
2835 #if defined(PETSC_USE_CTABLE)
2836   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2837 #else
2838   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2839 #endif
2840   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2841   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2842   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2843 
2844   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2845   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2846   B->preallocated  = PETSC_TRUE;
2847   B->was_assembled = PETSC_FALSE;
2848   B->assembled = PETSC_FALSE;
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2853 {
2854   Mat            mat;
2855   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2856   PetscErrorCode ierr;
2857 
2858   PetscFunctionBegin;
2859   *newmat = 0;
2860   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2861   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2862   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2863   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2864   a       = (Mat_MPIAIJ*)mat->data;
2865 
2866   mat->factortype   = matin->factortype;
2867   mat->assembled    = PETSC_TRUE;
2868   mat->insertmode   = NOT_SET_VALUES;
2869   mat->preallocated = PETSC_TRUE;
2870 
2871   a->size         = oldmat->size;
2872   a->rank         = oldmat->rank;
2873   a->donotstash   = oldmat->donotstash;
2874   a->roworiented  = oldmat->roworiented;
2875   a->rowindices   = 0;
2876   a->rowvalues    = 0;
2877   a->getrowactive = PETSC_FALSE;
2878 
2879   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2880   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2881 
2882   if (oldmat->colmap) {
2883 #if defined(PETSC_USE_CTABLE)
2884     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2885 #else
2886     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2887     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2888     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2889 #endif
2890   } else a->colmap = 0;
2891   if (oldmat->garray) {
2892     PetscInt len;
2893     len  = oldmat->B->cmap->n;
2894     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2895     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2896     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2897   } else a->garray = 0;
2898 
2899   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2900   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2901   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2902   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2903 
2904   if (oldmat->Mvctx_mpi1) {
2905     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2906     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2907   }
2908 
2909   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2910   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2911   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2912   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2913   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2914   *newmat = mat;
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2919 {
2920   PetscBool      isbinary, ishdf5;
2921   PetscErrorCode ierr;
2922 
2923   PetscFunctionBegin;
2924   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2925   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2926   /* force binary viewer to load .info file if it has not yet done so */
2927   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2928   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2929   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2930   if (isbinary) {
2931     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2932   } else if (ishdf5) {
2933 #if defined(PETSC_HAVE_HDF5)
2934     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2935 #else
2936     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2937 #endif
2938   } else {
2939     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2940   }
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2945 {
2946   PetscScalar    *vals,*svals;
2947   MPI_Comm       comm;
2948   PetscErrorCode ierr;
2949   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2950   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2951   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2952   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2953   PetscInt       cend,cstart,n,*rowners;
2954   int            fd;
2955   PetscInt       bs = newMat->rmap->bs;
2956 
2957   PetscFunctionBegin;
2958   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2959   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2960   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2961   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2962   if (!rank) {
2963     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2964     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2965     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2966   }
2967 
2968   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2969   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2970   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2971   if (bs < 0) bs = 1;
2972 
2973   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2974   M    = header[1]; N = header[2];
2975 
2976   /* If global sizes are set, check if they are consistent with that given in the file */
2977   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2978   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2979 
2980   /* determine ownership of all (block) rows */
2981   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2982   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2983   else m = newMat->rmap->n; /* Set by user */
2984 
2985   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2986   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2987 
2988   /* First process needs enough room for process with most rows */
2989   if (!rank) {
2990     mmax = rowners[1];
2991     for (i=2; i<=size; i++) {
2992       mmax = PetscMax(mmax, rowners[i]);
2993     }
2994   } else mmax = -1;             /* unused, but compilers complain */
2995 
2996   rowners[0] = 0;
2997   for (i=2; i<=size; i++) {
2998     rowners[i] += rowners[i-1];
2999   }
3000   rstart = rowners[rank];
3001   rend   = rowners[rank+1];
3002 
3003   /* distribute row lengths to all processors */
3004   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3005   if (!rank) {
3006     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3007     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3008     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3009     for (j=0; j<m; j++) {
3010       procsnz[0] += ourlens[j];
3011     }
3012     for (i=1; i<size; i++) {
3013       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3014       /* calculate the number of nonzeros on each processor */
3015       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3016         procsnz[i] += rowlengths[j];
3017       }
3018       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3019     }
3020     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3021   } else {
3022     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3023   }
3024 
3025   if (!rank) {
3026     /* determine max buffer needed and allocate it */
3027     maxnz = 0;
3028     for (i=0; i<size; i++) {
3029       maxnz = PetscMax(maxnz,procsnz[i]);
3030     }
3031     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3032 
3033     /* read in my part of the matrix column indices  */
3034     nz   = procsnz[0];
3035     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3036     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3037 
3038     /* read in every one elses and ship off */
3039     for (i=1; i<size; i++) {
3040       nz   = procsnz[i];
3041       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3042       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3043     }
3044     ierr = PetscFree(cols);CHKERRQ(ierr);
3045   } else {
3046     /* determine buffer space needed for message */
3047     nz = 0;
3048     for (i=0; i<m; i++) {
3049       nz += ourlens[i];
3050     }
3051     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3052 
3053     /* receive message of column indices*/
3054     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3055   }
3056 
3057   /* determine column ownership if matrix is not square */
3058   if (N != M) {
3059     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3060     else n = newMat->cmap->n;
3061     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3062     cstart = cend - n;
3063   } else {
3064     cstart = rstart;
3065     cend   = rend;
3066     n      = cend - cstart;
3067   }
3068 
3069   /* loop over local rows, determining number of off diagonal entries */
3070   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3071   jj   = 0;
3072   for (i=0; i<m; i++) {
3073     for (j=0; j<ourlens[i]; j++) {
3074       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3075       jj++;
3076     }
3077   }
3078 
3079   for (i=0; i<m; i++) {
3080     ourlens[i] -= offlens[i];
3081   }
3082   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3083 
3084   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3085 
3086   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3087 
3088   for (i=0; i<m; i++) {
3089     ourlens[i] += offlens[i];
3090   }
3091 
3092   if (!rank) {
3093     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3094 
3095     /* read in my part of the matrix numerical values  */
3096     nz   = procsnz[0];
3097     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3098 
3099     /* insert into matrix */
3100     jj      = rstart;
3101     smycols = mycols;
3102     svals   = vals;
3103     for (i=0; i<m; i++) {
3104       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3105       smycols += ourlens[i];
3106       svals   += ourlens[i];
3107       jj++;
3108     }
3109 
3110     /* read in other processors and ship out */
3111     for (i=1; i<size; i++) {
3112       nz   = procsnz[i];
3113       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3114       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3115     }
3116     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3117   } else {
3118     /* receive numeric values */
3119     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3120 
3121     /* receive message of values*/
3122     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3123 
3124     /* insert into matrix */
3125     jj      = rstart;
3126     smycols = mycols;
3127     svals   = vals;
3128     for (i=0; i<m; i++) {
3129       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3130       smycols += ourlens[i];
3131       svals   += ourlens[i];
3132       jj++;
3133     }
3134   }
3135   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3136   ierr = PetscFree(vals);CHKERRQ(ierr);
3137   ierr = PetscFree(mycols);CHKERRQ(ierr);
3138   ierr = PetscFree(rowners);CHKERRQ(ierr);
3139   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 /* Not scalable because of ISAllGather() unless getting all columns. */
3145 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3146 {
3147   PetscErrorCode ierr;
3148   IS             iscol_local;
3149   PetscBool      isstride;
3150   PetscMPIInt    lisstride=0,gisstride;
3151 
3152   PetscFunctionBegin;
3153   /* check if we are grabbing all columns*/
3154   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3155 
3156   if (isstride) {
3157     PetscInt  start,len,mstart,mlen;
3158     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3159     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3160     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3161     if (mstart == start && mlen-mstart == len) lisstride = 1;
3162   }
3163 
3164   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3165   if (gisstride) {
3166     PetscInt N;
3167     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3168     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3169     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3170     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3171   } else {
3172     PetscInt cbs;
3173     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3174     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3175     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3176   }
3177 
3178   *isseq = iscol_local;
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 /*
3183  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3184  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3185 
3186  Input Parameters:
3187    mat - matrix
3188    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3189            i.e., mat->rstart <= isrow[i] < mat->rend
3190    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3191            i.e., mat->cstart <= iscol[i] < mat->cend
3192  Output Parameter:
3193    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3194    iscol_o - sequential column index set for retrieving mat->B
3195    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3196  */
3197 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3198 {
3199   PetscErrorCode ierr;
3200   Vec            x,cmap;
3201   const PetscInt *is_idx;
3202   PetscScalar    *xarray,*cmaparray;
3203   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3204   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3205   Mat            B=a->B;
3206   Vec            lvec=a->lvec,lcmap;
3207   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3208   MPI_Comm       comm;
3209   VecScatter     Mvctx=a->Mvctx;
3210 
3211   PetscFunctionBegin;
3212   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3213   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3214 
3215   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3216   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3217   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3218   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3219   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3220 
3221   /* Get start indices */
3222   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3223   isstart -= ncols;
3224   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3225 
3226   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3227   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3228   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3229   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3230   for (i=0; i<ncols; i++) {
3231     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3232     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3233     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3234   }
3235   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3236   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3237   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3238 
3239   /* Get iscol_d */
3240   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3241   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3242   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3243 
3244   /* Get isrow_d */
3245   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3246   rstart = mat->rmap->rstart;
3247   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3248   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3249   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3250   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3251 
3252   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3253   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3254   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3255 
3256   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3257   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3258   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3259 
3260   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3261 
3262   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3263   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3264 
3265   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3266   /* off-process column indices */
3267   count = 0;
3268   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3269   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3270 
3271   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3272   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3273   for (i=0; i<Bn; i++) {
3274     if (PetscRealPart(xarray[i]) > -1.0) {
3275       idx[count]     = i;                   /* local column index in off-diagonal part B */
3276       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3277       count++;
3278     }
3279   }
3280   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3281   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3282 
3283   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3284   /* cannot ensure iscol_o has same blocksize as iscol! */
3285 
3286   ierr = PetscFree(idx);CHKERRQ(ierr);
3287   *garray = cmap1;
3288 
3289   ierr = VecDestroy(&x);CHKERRQ(ierr);
3290   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3291   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3296 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3297 {
3298   PetscErrorCode ierr;
3299   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3300   Mat            M = NULL;
3301   MPI_Comm       comm;
3302   IS             iscol_d,isrow_d,iscol_o;
3303   Mat            Asub = NULL,Bsub = NULL;
3304   PetscInt       n;
3305 
3306   PetscFunctionBegin;
3307   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3308 
3309   if (call == MAT_REUSE_MATRIX) {
3310     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3311     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3312     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3313 
3314     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3315     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3316 
3317     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3318     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3319 
3320     /* Update diagonal and off-diagonal portions of submat */
3321     asub = (Mat_MPIAIJ*)(*submat)->data;
3322     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3323     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3324     if (n) {
3325       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3326     }
3327     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329 
3330   } else { /* call == MAT_INITIAL_MATRIX) */
3331     const PetscInt *garray;
3332     PetscInt        BsubN;
3333 
3334     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3335     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3336 
3337     /* Create local submatrices Asub and Bsub */
3338     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3339     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3340 
3341     /* Create submatrix M */
3342     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3343 
3344     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3345     asub = (Mat_MPIAIJ*)M->data;
3346 
3347     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3348     n = asub->B->cmap->N;
3349     if (BsubN > n) {
3350       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3351       const PetscInt *idx;
3352       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3353       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3354 
3355       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3356       j = 0;
3357       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3358       for (i=0; i<n; i++) {
3359         if (j >= BsubN) break;
3360         while (subgarray[i] > garray[j]) j++;
3361 
3362         if (subgarray[i] == garray[j]) {
3363           idx_new[i] = idx[j++];
3364         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3365       }
3366       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3367 
3368       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3369       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3370 
3371     } else if (BsubN < n) {
3372       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3373     }
3374 
3375     ierr = PetscFree(garray);CHKERRQ(ierr);
3376     *submat = M;
3377 
3378     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3379     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3380     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3381 
3382     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3383     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3384 
3385     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3386     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3387   }
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3392 {
3393   PetscErrorCode ierr;
3394   IS             iscol_local=NULL,isrow_d;
3395   PetscInt       csize;
3396   PetscInt       n,i,j,start,end;
3397   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3398   MPI_Comm       comm;
3399 
3400   PetscFunctionBegin;
3401   /* If isrow has same processor distribution as mat,
3402      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3403   if (call == MAT_REUSE_MATRIX) {
3404     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3405     if (isrow_d) {
3406       sameRowDist  = PETSC_TRUE;
3407       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3408     } else {
3409       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3410       if (iscol_local) {
3411         sameRowDist  = PETSC_TRUE;
3412         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3413       }
3414     }
3415   } else {
3416     /* Check if isrow has same processor distribution as mat */
3417     sameDist[0] = PETSC_FALSE;
3418     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3419     if (!n) {
3420       sameDist[0] = PETSC_TRUE;
3421     } else {
3422       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3423       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3424       if (i >= start && j < end) {
3425         sameDist[0] = PETSC_TRUE;
3426       }
3427     }
3428 
3429     /* Check if iscol has same processor distribution as mat */
3430     sameDist[1] = PETSC_FALSE;
3431     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3432     if (!n) {
3433       sameDist[1] = PETSC_TRUE;
3434     } else {
3435       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3436       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3437       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3438     }
3439 
3440     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3441     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3442     sameRowDist = tsameDist[0];
3443   }
3444 
3445   if (sameRowDist) {
3446     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3447       /* isrow and iscol have same processor distribution as mat */
3448       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3449       PetscFunctionReturn(0);
3450     } else { /* sameRowDist */
3451       /* isrow has same processor distribution as mat */
3452       if (call == MAT_INITIAL_MATRIX) {
3453         PetscBool sorted;
3454         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3455         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3456         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3457         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3458 
3459         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3460         if (sorted) {
3461           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3462           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3463           PetscFunctionReturn(0);
3464         }
3465       } else { /* call == MAT_REUSE_MATRIX */
3466         IS    iscol_sub;
3467         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3468         if (iscol_sub) {
3469           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3470           PetscFunctionReturn(0);
3471         }
3472       }
3473     }
3474   }
3475 
3476   /* General case: iscol -> iscol_local which has global size of iscol */
3477   if (call == MAT_REUSE_MATRIX) {
3478     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3479     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3480   } else {
3481     if (!iscol_local) {
3482       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3483     }
3484   }
3485 
3486   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3487   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3488 
3489   if (call == MAT_INITIAL_MATRIX) {
3490     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3491     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3492   }
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 /*@C
3497      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3498          and "off-diagonal" part of the matrix in CSR format.
3499 
3500    Collective
3501 
3502    Input Parameters:
3503 +  comm - MPI communicator
3504 .  A - "diagonal" portion of matrix
3505 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3506 -  garray - global index of B columns
3507 
3508    Output Parameter:
3509 .   mat - the matrix, with input A as its local diagonal matrix
3510    Level: advanced
3511 
3512    Notes:
3513        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3514        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3515 
3516 .seealso: MatCreateMPIAIJWithSplitArrays()
3517 @*/
3518 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3519 {
3520   PetscErrorCode ierr;
3521   Mat_MPIAIJ     *maij;
3522   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3523   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3524   PetscScalar    *oa=b->a;
3525   Mat            Bnew;
3526   PetscInt       m,n,N;
3527 
3528   PetscFunctionBegin;
3529   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3530   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3531   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3532   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3533   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3534   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3535 
3536   /* Get global columns of mat */
3537   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3538 
3539   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3540   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3541   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3542   maij = (Mat_MPIAIJ*)(*mat)->data;
3543 
3544   (*mat)->preallocated = PETSC_TRUE;
3545 
3546   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3547   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3548 
3549   /* Set A as diagonal portion of *mat */
3550   maij->A = A;
3551 
3552   nz = oi[m];
3553   for (i=0; i<nz; i++) {
3554     col   = oj[i];
3555     oj[i] = garray[col];
3556   }
3557 
3558    /* Set Bnew as off-diagonal portion of *mat */
3559   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3560   bnew        = (Mat_SeqAIJ*)Bnew->data;
3561   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3562   maij->B     = Bnew;
3563 
3564   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3565 
3566   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3567   b->free_a       = PETSC_FALSE;
3568   b->free_ij      = PETSC_FALSE;
3569   ierr = MatDestroy(&B);CHKERRQ(ierr);
3570 
3571   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3572   bnew->free_a       = PETSC_TRUE;
3573   bnew->free_ij      = PETSC_TRUE;
3574 
3575   /* condense columns of maij->B */
3576   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3577   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3578   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3580   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3585 
3586 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3587 {
3588   PetscErrorCode ierr;
3589   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3590   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3591   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3592   Mat            M,Msub,B=a->B;
3593   MatScalar      *aa;
3594   Mat_SeqAIJ     *aij;
3595   PetscInt       *garray = a->garray,*colsub,Ncols;
3596   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3597   IS             iscol_sub,iscmap;
3598   const PetscInt *is_idx,*cmap;
3599   PetscBool      allcolumns=PETSC_FALSE;
3600   MPI_Comm       comm;
3601 
3602   PetscFunctionBegin;
3603   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3604 
3605   if (call == MAT_REUSE_MATRIX) {
3606     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3607     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3608     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3609 
3610     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3611     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3612 
3613     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3614     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3615 
3616     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3617 
3618   } else { /* call == MAT_INITIAL_MATRIX) */
3619     PetscBool flg;
3620 
3621     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3622     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3623 
3624     /* (1) iscol -> nonscalable iscol_local */
3625     /* Check for special case: each processor gets entire matrix columns */
3626     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3627     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3628     if (allcolumns) {
3629       iscol_sub = iscol_local;
3630       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3631       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3632 
3633     } else {
3634       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3635       PetscInt *idx,*cmap1,k;
3636       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3637       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3638       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3639       count = 0;
3640       k     = 0;
3641       for (i=0; i<Ncols; i++) {
3642         j = is_idx[i];
3643         if (j >= cstart && j < cend) {
3644           /* diagonal part of mat */
3645           idx[count]     = j;
3646           cmap1[count++] = i; /* column index in submat */
3647         } else if (Bn) {
3648           /* off-diagonal part of mat */
3649           if (j == garray[k]) {
3650             idx[count]     = j;
3651             cmap1[count++] = i;  /* column index in submat */
3652           } else if (j > garray[k]) {
3653             while (j > garray[k] && k < Bn-1) k++;
3654             if (j == garray[k]) {
3655               idx[count]     = j;
3656               cmap1[count++] = i; /* column index in submat */
3657             }
3658           }
3659         }
3660       }
3661       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3662 
3663       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3664       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3665       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3666 
3667       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3668     }
3669 
3670     /* (3) Create sequential Msub */
3671     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3672   }
3673 
3674   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3675   aij  = (Mat_SeqAIJ*)(Msub)->data;
3676   ii   = aij->i;
3677   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3678 
3679   /*
3680       m - number of local rows
3681       Ncols - number of columns (same on all processors)
3682       rstart - first row in new global matrix generated
3683   */
3684   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3685 
3686   if (call == MAT_INITIAL_MATRIX) {
3687     /* (4) Create parallel newmat */
3688     PetscMPIInt    rank,size;
3689     PetscInt       csize;
3690 
3691     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3692     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3693 
3694     /*
3695         Determine the number of non-zeros in the diagonal and off-diagonal
3696         portions of the matrix in order to do correct preallocation
3697     */
3698 
3699     /* first get start and end of "diagonal" columns */
3700     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3701     if (csize == PETSC_DECIDE) {
3702       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3703       if (mglobal == Ncols) { /* square matrix */
3704         nlocal = m;
3705       } else {
3706         nlocal = Ncols/size + ((Ncols % size) > rank);
3707       }
3708     } else {
3709       nlocal = csize;
3710     }
3711     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3712     rstart = rend - nlocal;
3713     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3714 
3715     /* next, compute all the lengths */
3716     jj    = aij->j;
3717     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3718     olens = dlens + m;
3719     for (i=0; i<m; i++) {
3720       jend = ii[i+1] - ii[i];
3721       olen = 0;
3722       dlen = 0;
3723       for (j=0; j<jend; j++) {
3724         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3725         else dlen++;
3726         jj++;
3727       }
3728       olens[i] = olen;
3729       dlens[i] = dlen;
3730     }
3731 
3732     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3733     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3734 
3735     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3736     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3737     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3738     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3739     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3740     ierr = PetscFree(dlens);CHKERRQ(ierr);
3741 
3742   } else { /* call == MAT_REUSE_MATRIX */
3743     M    = *newmat;
3744     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3745     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3746     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3747     /*
3748          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3749        rather than the slower MatSetValues().
3750     */
3751     M->was_assembled = PETSC_TRUE;
3752     M->assembled     = PETSC_FALSE;
3753   }
3754 
3755   /* (5) Set values of Msub to *newmat */
3756   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3757   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3758 
3759   jj   = aij->j;
3760   aa   = aij->a;
3761   for (i=0; i<m; i++) {
3762     row = rstart + i;
3763     nz  = ii[i+1] - ii[i];
3764     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3765     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3766     jj += nz; aa += nz;
3767   }
3768   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3769 
3770   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772 
3773   ierr = PetscFree(colsub);CHKERRQ(ierr);
3774 
3775   /* save Msub, iscol_sub and iscmap used in processor for next request */
3776   if (call ==  MAT_INITIAL_MATRIX) {
3777     *newmat = M;
3778     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3779     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3780 
3781     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3782     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3783 
3784     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3785     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3786 
3787     if (iscol_local) {
3788       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3789       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3790     }
3791   }
3792   PetscFunctionReturn(0);
3793 }
3794 
3795 /*
3796     Not great since it makes two copies of the submatrix, first an SeqAIJ
3797   in local and then by concatenating the local matrices the end result.
3798   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3799 
3800   Note: This requires a sequential iscol with all indices.
3801 */
3802 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3803 {
3804   PetscErrorCode ierr;
3805   PetscMPIInt    rank,size;
3806   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3807   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3808   Mat            M,Mreuse;
3809   MatScalar      *aa,*vwork;
3810   MPI_Comm       comm;
3811   Mat_SeqAIJ     *aij;
3812   PetscBool      colflag,allcolumns=PETSC_FALSE;
3813 
3814   PetscFunctionBegin;
3815   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3816   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3817   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3818 
3819   /* Check for special case: each processor gets entire matrix columns */
3820   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3821   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3822   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3823 
3824   if (call ==  MAT_REUSE_MATRIX) {
3825     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3826     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3827     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3828   } else {
3829     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3830   }
3831 
3832   /*
3833       m - number of local rows
3834       n - number of columns (same on all processors)
3835       rstart - first row in new global matrix generated
3836   */
3837   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3838   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3839   if (call == MAT_INITIAL_MATRIX) {
3840     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3841     ii  = aij->i;
3842     jj  = aij->j;
3843 
3844     /*
3845         Determine the number of non-zeros in the diagonal and off-diagonal
3846         portions of the matrix in order to do correct preallocation
3847     */
3848 
3849     /* first get start and end of "diagonal" columns */
3850     if (csize == PETSC_DECIDE) {
3851       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3852       if (mglobal == n) { /* square matrix */
3853         nlocal = m;
3854       } else {
3855         nlocal = n/size + ((n % size) > rank);
3856       }
3857     } else {
3858       nlocal = csize;
3859     }
3860     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3861     rstart = rend - nlocal;
3862     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3863 
3864     /* next, compute all the lengths */
3865     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3866     olens = dlens + m;
3867     for (i=0; i<m; i++) {
3868       jend = ii[i+1] - ii[i];
3869       olen = 0;
3870       dlen = 0;
3871       for (j=0; j<jend; j++) {
3872         if (*jj < rstart || *jj >= rend) olen++;
3873         else dlen++;
3874         jj++;
3875       }
3876       olens[i] = olen;
3877       dlens[i] = dlen;
3878     }
3879     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3880     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3881     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3882     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3883     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3884     ierr = PetscFree(dlens);CHKERRQ(ierr);
3885   } else {
3886     PetscInt ml,nl;
3887 
3888     M    = *newmat;
3889     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3890     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3891     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3892     /*
3893          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3894        rather than the slower MatSetValues().
3895     */
3896     M->was_assembled = PETSC_TRUE;
3897     M->assembled     = PETSC_FALSE;
3898   }
3899   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3900   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3901   ii   = aij->i;
3902   jj   = aij->j;
3903   aa   = aij->a;
3904   for (i=0; i<m; i++) {
3905     row   = rstart + i;
3906     nz    = ii[i+1] - ii[i];
3907     cwork = jj;     jj += nz;
3908     vwork = aa;     aa += nz;
3909     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3910   }
3911 
3912   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3913   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3914   *newmat = M;
3915 
3916   /* save submatrix used in processor for next request */
3917   if (call ==  MAT_INITIAL_MATRIX) {
3918     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3919     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3920   }
3921   PetscFunctionReturn(0);
3922 }
3923 
3924 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3925 {
3926   PetscInt       m,cstart, cend,j,nnz,i,d;
3927   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3928   const PetscInt *JJ;
3929   PetscErrorCode ierr;
3930   PetscBool      nooffprocentries;
3931 
3932   PetscFunctionBegin;
3933   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3934 
3935   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3936   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3937   m      = B->rmap->n;
3938   cstart = B->cmap->rstart;
3939   cend   = B->cmap->rend;
3940   rstart = B->rmap->rstart;
3941 
3942   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3943 
3944 #if defined(PETSC_USE_DEBUG)
3945   for (i=0; i<m; i++) {
3946     nnz = Ii[i+1]- Ii[i];
3947     JJ  = J + Ii[i];
3948     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3949     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3950     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3951   }
3952 #endif
3953 
3954   for (i=0; i<m; i++) {
3955     nnz     = Ii[i+1]- Ii[i];
3956     JJ      = J + Ii[i];
3957     nnz_max = PetscMax(nnz_max,nnz);
3958     d       = 0;
3959     for (j=0; j<nnz; j++) {
3960       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3961     }
3962     d_nnz[i] = d;
3963     o_nnz[i] = nnz - d;
3964   }
3965   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3966   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3967 
3968   for (i=0; i<m; i++) {
3969     ii   = i + rstart;
3970     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3971   }
3972   nooffprocentries    = B->nooffprocentries;
3973   B->nooffprocentries = PETSC_TRUE;
3974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   B->nooffprocentries = nooffprocentries;
3977 
3978   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 /*@
3983    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3984    (the default parallel PETSc format).
3985 
3986    Collective
3987 
3988    Input Parameters:
3989 +  B - the matrix
3990 .  i - the indices into j for the start of each local row (starts with zero)
3991 .  j - the column indices for each local row (starts with zero)
3992 -  v - optional values in the matrix
3993 
3994    Level: developer
3995 
3996    Notes:
3997        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3998      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3999      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4000 
4001        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4002 
4003        The format which is used for the sparse matrix input, is equivalent to a
4004     row-major ordering.. i.e for the following matrix, the input data expected is
4005     as shown
4006 
4007 $        1 0 0
4008 $        2 0 3     P0
4009 $       -------
4010 $        4 5 6     P1
4011 $
4012 $     Process0 [P0]: rows_owned=[0,1]
4013 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4014 $        j =  {0,0,2}  [size = 3]
4015 $        v =  {1,2,3}  [size = 3]
4016 $
4017 $     Process1 [P1]: rows_owned=[2]
4018 $        i =  {0,3}    [size = nrow+1  = 1+1]
4019 $        j =  {0,1,2}  [size = 3]
4020 $        v =  {4,5,6}  [size = 3]
4021 
4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4023           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4024 @*/
4025 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4026 {
4027   PetscErrorCode ierr;
4028 
4029   PetscFunctionBegin;
4030   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 /*@C
4035    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4036    (the default parallel PETSc format).  For good matrix assembly performance
4037    the user should preallocate the matrix storage by setting the parameters
4038    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4039    performance can be increased by more than a factor of 50.
4040 
4041    Collective
4042 
4043    Input Parameters:
4044 +  B - the matrix
4045 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4046            (same value is used for all local rows)
4047 .  d_nnz - array containing the number of nonzeros in the various rows of the
4048            DIAGONAL portion of the local submatrix (possibly different for each row)
4049            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4050            The size of this array is equal to the number of local rows, i.e 'm'.
4051            For matrices that will be factored, you must leave room for (and set)
4052            the diagonal entry even if it is zero.
4053 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4054            submatrix (same value is used for all local rows).
4055 -  o_nnz - array containing the number of nonzeros in the various rows of the
4056            OFF-DIAGONAL portion of the local submatrix (possibly different for
4057            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4058            structure. The size of this array is equal to the number
4059            of local rows, i.e 'm'.
4060 
4061    If the *_nnz parameter is given then the *_nz parameter is ignored
4062 
4063    The AIJ format (also called the Yale sparse matrix format or
4064    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4065    storage.  The stored row and column indices begin with zero.
4066    See Users-Manual: ch_mat for details.
4067 
4068    The parallel matrix is partitioned such that the first m0 rows belong to
4069    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4070    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4071 
4072    The DIAGONAL portion of the local submatrix of a processor can be defined
4073    as the submatrix which is obtained by extraction the part corresponding to
4074    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4075    first row that belongs to the processor, r2 is the last row belonging to
4076    the this processor, and c1-c2 is range of indices of the local part of a
4077    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4078    common case of a square matrix, the row and column ranges are the same and
4079    the DIAGONAL part is also square. The remaining portion of the local
4080    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4081 
4082    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4083 
4084    You can call MatGetInfo() to get information on how effective the preallocation was;
4085    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4086    You can also run with the option -info and look for messages with the string
4087    malloc in them to see if additional memory allocation was needed.
4088 
4089    Example usage:
4090 
4091    Consider the following 8x8 matrix with 34 non-zero values, that is
4092    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4093    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4094    as follows:
4095 
4096 .vb
4097             1  2  0  |  0  3  0  |  0  4
4098     Proc0   0  5  6  |  7  0  0  |  8  0
4099             9  0 10  | 11  0  0  | 12  0
4100     -------------------------------------
4101            13  0 14  | 15 16 17  |  0  0
4102     Proc1   0 18  0  | 19 20 21  |  0  0
4103             0  0  0  | 22 23  0  | 24  0
4104     -------------------------------------
4105     Proc2  25 26 27  |  0  0 28  | 29  0
4106            30  0  0  | 31 32 33  |  0 34
4107 .ve
4108 
4109    This can be represented as a collection of submatrices as:
4110 
4111 .vb
4112       A B C
4113       D E F
4114       G H I
4115 .ve
4116 
4117    Where the submatrices A,B,C are owned by proc0, D,E,F are
4118    owned by proc1, G,H,I are owned by proc2.
4119 
4120    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4121    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4122    The 'M','N' parameters are 8,8, and have the same values on all procs.
4123 
4124    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4125    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4126    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4127    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4128    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4129    matrix, ans [DF] as another SeqAIJ matrix.
4130 
4131    When d_nz, o_nz parameters are specified, d_nz storage elements are
4132    allocated for every row of the local diagonal submatrix, and o_nz
4133    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4134    One way to choose d_nz and o_nz is to use the max nonzerors per local
4135    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4136    In this case, the values of d_nz,o_nz are:
4137 .vb
4138      proc0 : dnz = 2, o_nz = 2
4139      proc1 : dnz = 3, o_nz = 2
4140      proc2 : dnz = 1, o_nz = 4
4141 .ve
4142    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4143    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4144    for proc3. i.e we are using 12+15+10=37 storage locations to store
4145    34 values.
4146 
4147    When d_nnz, o_nnz parameters are specified, the storage is specified
4148    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4149    In the above case the values for d_nnz,o_nnz are:
4150 .vb
4151      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4152      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4153      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4154 .ve
4155    Here the space allocated is sum of all the above values i.e 34, and
4156    hence pre-allocation is perfect.
4157 
4158    Level: intermediate
4159 
4160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4161           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4162 @*/
4163 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4164 {
4165   PetscErrorCode ierr;
4166 
4167   PetscFunctionBegin;
4168   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4169   PetscValidType(B,1);
4170   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4171   PetscFunctionReturn(0);
4172 }
4173 
4174 /*@
4175      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4176          CSR format for the local rows.
4177 
4178    Collective
4179 
4180    Input Parameters:
4181 +  comm - MPI communicator
4182 .  m - number of local rows (Cannot be PETSC_DECIDE)
4183 .  n - This value should be the same as the local size used in creating the
4184        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4185        calculated if N is given) For square matrices n is almost always m.
4186 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4187 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4188 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4189 .   j - column indices
4190 -   a - matrix values
4191 
4192    Output Parameter:
4193 .   mat - the matrix
4194 
4195    Level: intermediate
4196 
4197    Notes:
4198        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4199      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4200      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4201 
4202        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4203 
4204        The format which is used for the sparse matrix input, is equivalent to a
4205     row-major ordering.. i.e for the following matrix, the input data expected is
4206     as shown
4207 
4208        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4209 
4210 $        1 0 0
4211 $        2 0 3     P0
4212 $       -------
4213 $        4 5 6     P1
4214 $
4215 $     Process0 [P0]: rows_owned=[0,1]
4216 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4217 $        j =  {0,0,2}  [size = 3]
4218 $        v =  {1,2,3}  [size = 3]
4219 $
4220 $     Process1 [P1]: rows_owned=[2]
4221 $        i =  {0,3}    [size = nrow+1  = 1+1]
4222 $        j =  {0,1,2}  [size = 3]
4223 $        v =  {4,5,6}  [size = 3]
4224 
4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4226           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4227 @*/
4228 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4229 {
4230   PetscErrorCode ierr;
4231 
4232   PetscFunctionBegin;
4233   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4234   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4235   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4236   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4237   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4238   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4239   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4240   PetscFunctionReturn(0);
4241 }
4242 
4243 /*@
4244      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4245          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4246 
4247    Collective
4248 
4249    Input Parameters:
4250 +  mat - the matrix
4251 .  m - number of local rows (Cannot be PETSC_DECIDE)
4252 .  n - This value should be the same as the local size used in creating the
4253        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4254        calculated if N is given) For square matrices n is almost always m.
4255 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4256 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4257 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4258 .  J - column indices
4259 -  v - matrix values
4260 
4261    Level: intermediate
4262 
4263 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4264           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4265 @*/
4266 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4267 {
4268   PetscErrorCode ierr;
4269   PetscInt       cstart,nnz,i,j;
4270   PetscInt       *ld;
4271   PetscBool      nooffprocentries;
4272   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4273   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4274   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4275   const PetscInt *Adi = Ad->i;
4276   PetscInt       ldi,Iii,md;
4277 
4278   PetscFunctionBegin;
4279   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4280   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4281   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4282   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4283 
4284   cstart = mat->cmap->rstart;
4285   if (!Aij->ld) {
4286     /* count number of entries below block diagonal */
4287     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4288     Aij->ld = ld;
4289     for (i=0; i<m; i++) {
4290       nnz  = Ii[i+1]- Ii[i];
4291       j     = 0;
4292       while  (J[j] < cstart && j < nnz) {j++;}
4293       J    += nnz;
4294       ld[i] = j;
4295     }
4296   } else {
4297     ld = Aij->ld;
4298   }
4299 
4300   for (i=0; i<m; i++) {
4301     nnz  = Ii[i+1]- Ii[i];
4302     Iii  = Ii[i];
4303     ldi  = ld[i];
4304     md   = Adi[i+1]-Adi[i];
4305     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4306     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4307     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4308     ad  += md;
4309     ao  += nnz - md;
4310   }
4311   nooffprocentries      = mat->nooffprocentries;
4312   mat->nooffprocentries = PETSC_TRUE;
4313   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4314   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4315   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4316   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4317   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4318   mat->nooffprocentries = nooffprocentries;
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 /*@C
4323    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4324    (the default parallel PETSc format).  For good matrix assembly performance
4325    the user should preallocate the matrix storage by setting the parameters
4326    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4327    performance can be increased by more than a factor of 50.
4328 
4329    Collective
4330 
4331    Input Parameters:
4332 +  comm - MPI communicator
4333 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4334            This value should be the same as the local size used in creating the
4335            y vector for the matrix-vector product y = Ax.
4336 .  n - This value should be the same as the local size used in creating the
4337        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4338        calculated if N is given) For square matrices n is almost always m.
4339 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4340 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4341 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4342            (same value is used for all local rows)
4343 .  d_nnz - array containing the number of nonzeros in the various rows of the
4344            DIAGONAL portion of the local submatrix (possibly different for each row)
4345            or NULL, if d_nz is used to specify the nonzero structure.
4346            The size of this array is equal to the number of local rows, i.e 'm'.
4347 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4348            submatrix (same value is used for all local rows).
4349 -  o_nnz - array containing the number of nonzeros in the various rows of the
4350            OFF-DIAGONAL portion of the local submatrix (possibly different for
4351            each row) or NULL, if o_nz is used to specify the nonzero
4352            structure. The size of this array is equal to the number
4353            of local rows, i.e 'm'.
4354 
4355    Output Parameter:
4356 .  A - the matrix
4357 
4358    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4359    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4360    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4361 
4362    Notes:
4363    If the *_nnz parameter is given then the *_nz parameter is ignored
4364 
4365    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4366    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4367    storage requirements for this matrix.
4368 
4369    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4370    processor than it must be used on all processors that share the object for
4371    that argument.
4372 
4373    The user MUST specify either the local or global matrix dimensions
4374    (possibly both).
4375 
4376    The parallel matrix is partitioned across processors such that the
4377    first m0 rows belong to process 0, the next m1 rows belong to
4378    process 1, the next m2 rows belong to process 2 etc.. where
4379    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4380    values corresponding to [m x N] submatrix.
4381 
4382    The columns are logically partitioned with the n0 columns belonging
4383    to 0th partition, the next n1 columns belonging to the next
4384    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4385 
4386    The DIAGONAL portion of the local submatrix on any given processor
4387    is the submatrix corresponding to the rows and columns m,n
4388    corresponding to the given processor. i.e diagonal matrix on
4389    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4390    etc. The remaining portion of the local submatrix [m x (N-n)]
4391    constitute the OFF-DIAGONAL portion. The example below better
4392    illustrates this concept.
4393 
4394    For a square global matrix we define each processor's diagonal portion
4395    to be its local rows and the corresponding columns (a square submatrix);
4396    each processor's off-diagonal portion encompasses the remainder of the
4397    local matrix (a rectangular submatrix).
4398 
4399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4400 
4401    When calling this routine with a single process communicator, a matrix of
4402    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4403    type of communicator, use the construction mechanism
4404 .vb
4405      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4406 .ve
4407 
4408 $     MatCreate(...,&A);
4409 $     MatSetType(A,MATMPIAIJ);
4410 $     MatSetSizes(A, m,n,M,N);
4411 $     MatMPIAIJSetPreallocation(A,...);
4412 
4413    By default, this format uses inodes (identical nodes) when possible.
4414    We search for consecutive rows with the same nonzero structure, thereby
4415    reusing matrix information to achieve increased efficiency.
4416 
4417    Options Database Keys:
4418 +  -mat_no_inode  - Do not use inodes
4419 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4420 
4421 
4422 
4423    Example usage:
4424 
4425    Consider the following 8x8 matrix with 34 non-zero values, that is
4426    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4427    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4428    as follows
4429 
4430 .vb
4431             1  2  0  |  0  3  0  |  0  4
4432     Proc0   0  5  6  |  7  0  0  |  8  0
4433             9  0 10  | 11  0  0  | 12  0
4434     -------------------------------------
4435            13  0 14  | 15 16 17  |  0  0
4436     Proc1   0 18  0  | 19 20 21  |  0  0
4437             0  0  0  | 22 23  0  | 24  0
4438     -------------------------------------
4439     Proc2  25 26 27  |  0  0 28  | 29  0
4440            30  0  0  | 31 32 33  |  0 34
4441 .ve
4442 
4443    This can be represented as a collection of submatrices as
4444 
4445 .vb
4446       A B C
4447       D E F
4448       G H I
4449 .ve
4450 
4451    Where the submatrices A,B,C are owned by proc0, D,E,F are
4452    owned by proc1, G,H,I are owned by proc2.
4453 
4454    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4455    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4456    The 'M','N' parameters are 8,8, and have the same values on all procs.
4457 
4458    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4459    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4460    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4461    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4462    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4463    matrix, ans [DF] as another SeqAIJ matrix.
4464 
4465    When d_nz, o_nz parameters are specified, d_nz storage elements are
4466    allocated for every row of the local diagonal submatrix, and o_nz
4467    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4468    One way to choose d_nz and o_nz is to use the max nonzerors per local
4469    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4470    In this case, the values of d_nz,o_nz are
4471 .vb
4472      proc0 : dnz = 2, o_nz = 2
4473      proc1 : dnz = 3, o_nz = 2
4474      proc2 : dnz = 1, o_nz = 4
4475 .ve
4476    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4477    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4478    for proc3. i.e we are using 12+15+10=37 storage locations to store
4479    34 values.
4480 
4481    When d_nnz, o_nnz parameters are specified, the storage is specified
4482    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4483    In the above case the values for d_nnz,o_nnz are
4484 .vb
4485      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4486      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4487      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4488 .ve
4489    Here the space allocated is sum of all the above values i.e 34, and
4490    hence pre-allocation is perfect.
4491 
4492    Level: intermediate
4493 
4494 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4495           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4496 @*/
4497 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4498 {
4499   PetscErrorCode ierr;
4500   PetscMPIInt    size;
4501 
4502   PetscFunctionBegin;
4503   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4504   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4506   if (size > 1) {
4507     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4508     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4509   } else {
4510     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4511     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4512   }
4513   PetscFunctionReturn(0);
4514 }
4515 
4516 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4517 {
4518   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4519   PetscBool      flg;
4520   PetscErrorCode ierr;
4521 
4522   PetscFunctionBegin;
4523   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4524   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4525   if (Ad)     *Ad     = a->A;
4526   if (Ao)     *Ao     = a->B;
4527   if (colmap) *colmap = a->garray;
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4532 {
4533   PetscErrorCode ierr;
4534   PetscInt       m,N,i,rstart,nnz,Ii;
4535   PetscInt       *indx;
4536   PetscScalar    *values;
4537 
4538   PetscFunctionBegin;
4539   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4540   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4541     PetscInt       *dnz,*onz,sum,bs,cbs;
4542 
4543     if (n == PETSC_DECIDE) {
4544       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4545     }
4546     /* Check sum(n) = N */
4547     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4548     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4549 
4550     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4551     rstart -= m;
4552 
4553     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4554     for (i=0; i<m; i++) {
4555       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4556       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4557       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4558     }
4559 
4560     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4561     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4562     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4563     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4564     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4565     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4566     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4567     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4568   }
4569 
4570   /* numeric phase */
4571   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4572   for (i=0; i<m; i++) {
4573     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4574     Ii   = i + rstart;
4575     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4576     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4577   }
4578   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4579   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4584 {
4585   PetscErrorCode    ierr;
4586   PetscMPIInt       rank;
4587   PetscInt          m,N,i,rstart,nnz;
4588   size_t            len;
4589   const PetscInt    *indx;
4590   PetscViewer       out;
4591   char              *name;
4592   Mat               B;
4593   const PetscScalar *values;
4594 
4595   PetscFunctionBegin;
4596   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4597   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4598   /* Should this be the type of the diagonal block of A? */
4599   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4600   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4601   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4602   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4603   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4604   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4605   for (i=0; i<m; i++) {
4606     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4607     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4608     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4609   }
4610   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612 
4613   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4614   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4615   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4616   sprintf(name,"%s.%d",outfile,rank);
4617   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4618   ierr = PetscFree(name);CHKERRQ(ierr);
4619   ierr = MatView(B,out);CHKERRQ(ierr);
4620   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4621   ierr = MatDestroy(&B);CHKERRQ(ierr);
4622   PetscFunctionReturn(0);
4623 }
4624 
4625 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4626 {
4627   PetscErrorCode      ierr;
4628   Mat_Merge_SeqsToMPI *merge;
4629   PetscContainer      container;
4630 
4631   PetscFunctionBegin;
4632   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4633   if (container) {
4634     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4635     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4637     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4638     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4639     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4640     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4641     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4642     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4643     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4644     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4645     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4646     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4647     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4648     ierr = PetscFree(merge);CHKERRQ(ierr);
4649     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4650   }
4651   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 #include <../src/mat/utils/freespace.h>
4656 #include <petscbt.h>
4657 
4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4659 {
4660   PetscErrorCode      ierr;
4661   MPI_Comm            comm;
4662   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4663   PetscMPIInt         size,rank,taga,*len_s;
4664   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4665   PetscInt            proc,m;
4666   PetscInt            **buf_ri,**buf_rj;
4667   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4668   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4669   MPI_Request         *s_waits,*r_waits;
4670   MPI_Status          *status;
4671   MatScalar           *aa=a->a;
4672   MatScalar           **abuf_r,*ba_i;
4673   Mat_Merge_SeqsToMPI *merge;
4674   PetscContainer      container;
4675 
4676   PetscFunctionBegin;
4677   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4678   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4679 
4680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4682 
4683   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4684   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4685 
4686   bi     = merge->bi;
4687   bj     = merge->bj;
4688   buf_ri = merge->buf_ri;
4689   buf_rj = merge->buf_rj;
4690 
4691   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4692   owners = merge->rowmap->range;
4693   len_s  = merge->len_s;
4694 
4695   /* send and recv matrix values */
4696   /*-----------------------------*/
4697   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4698   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4699 
4700   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4701   for (proc=0,k=0; proc<size; proc++) {
4702     if (!len_s[proc]) continue;
4703     i    = owners[proc];
4704     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4705     k++;
4706   }
4707 
4708   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4709   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4710   ierr = PetscFree(status);CHKERRQ(ierr);
4711 
4712   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4713   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4714 
4715   /* insert mat values of mpimat */
4716   /*----------------------------*/
4717   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4718   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4719 
4720   for (k=0; k<merge->nrecv; k++) {
4721     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4722     nrows       = *(buf_ri_k[k]);
4723     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4724     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4725   }
4726 
4727   /* set values of ba */
4728   m = merge->rowmap->n;
4729   for (i=0; i<m; i++) {
4730     arow = owners[rank] + i;
4731     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4732     bnzi = bi[i+1] - bi[i];
4733     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4734 
4735     /* add local non-zero vals of this proc's seqmat into ba */
4736     anzi   = ai[arow+1] - ai[arow];
4737     aj     = a->j + ai[arow];
4738     aa     = a->a + ai[arow];
4739     nextaj = 0;
4740     for (j=0; nextaj<anzi; j++) {
4741       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4742         ba_i[j] += aa[nextaj++];
4743       }
4744     }
4745 
4746     /* add received vals into ba */
4747     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4748       /* i-th row */
4749       if (i == *nextrow[k]) {
4750         anzi   = *(nextai[k]+1) - *nextai[k];
4751         aj     = buf_rj[k] + *(nextai[k]);
4752         aa     = abuf_r[k] + *(nextai[k]);
4753         nextaj = 0;
4754         for (j=0; nextaj<anzi; j++) {
4755           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4756             ba_i[j] += aa[nextaj++];
4757           }
4758         }
4759         nextrow[k]++; nextai[k]++;
4760       }
4761     }
4762     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4763   }
4764   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4765   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4766 
4767   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4768   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4769   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4770   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4771   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4772   PetscFunctionReturn(0);
4773 }
4774 
4775 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4776 {
4777   PetscErrorCode      ierr;
4778   Mat                 B_mpi;
4779   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4780   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4781   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4782   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4783   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4784   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4785   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4786   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4787   MPI_Status          *status;
4788   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4789   PetscBT             lnkbt;
4790   Mat_Merge_SeqsToMPI *merge;
4791   PetscContainer      container;
4792 
4793   PetscFunctionBegin;
4794   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4795 
4796   /* make sure it is a PETSc comm */
4797   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4799   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4800 
4801   ierr = PetscNew(&merge);CHKERRQ(ierr);
4802   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4803 
4804   /* determine row ownership */
4805   /*---------------------------------------------------------*/
4806   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4807   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4808   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4809   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4810   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4811   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4812   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4813 
4814   m      = merge->rowmap->n;
4815   owners = merge->rowmap->range;
4816 
4817   /* determine the number of messages to send, their lengths */
4818   /*---------------------------------------------------------*/
4819   len_s = merge->len_s;
4820 
4821   len          = 0; /* length of buf_si[] */
4822   merge->nsend = 0;
4823   for (proc=0; proc<size; proc++) {
4824     len_si[proc] = 0;
4825     if (proc == rank) {
4826       len_s[proc] = 0;
4827     } else {
4828       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4829       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4830     }
4831     if (len_s[proc]) {
4832       merge->nsend++;
4833       nrows = 0;
4834       for (i=owners[proc]; i<owners[proc+1]; i++) {
4835         if (ai[i+1] > ai[i]) nrows++;
4836       }
4837       len_si[proc] = 2*(nrows+1);
4838       len         += len_si[proc];
4839     }
4840   }
4841 
4842   /* determine the number and length of messages to receive for ij-structure */
4843   /*-------------------------------------------------------------------------*/
4844   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4845   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4846 
4847   /* post the Irecv of j-structure */
4848   /*-------------------------------*/
4849   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4850   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4851 
4852   /* post the Isend of j-structure */
4853   /*--------------------------------*/
4854   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4855 
4856   for (proc=0, k=0; proc<size; proc++) {
4857     if (!len_s[proc]) continue;
4858     i    = owners[proc];
4859     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4860     k++;
4861   }
4862 
4863   /* receives and sends of j-structure are complete */
4864   /*------------------------------------------------*/
4865   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4866   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4867 
4868   /* send and recv i-structure */
4869   /*---------------------------*/
4870   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4871   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4872 
4873   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4874   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4875   for (proc=0,k=0; proc<size; proc++) {
4876     if (!len_s[proc]) continue;
4877     /* form outgoing message for i-structure:
4878          buf_si[0]:                 nrows to be sent
4879                [1:nrows]:           row index (global)
4880                [nrows+1:2*nrows+1]: i-structure index
4881     */
4882     /*-------------------------------------------*/
4883     nrows       = len_si[proc]/2 - 1;
4884     buf_si_i    = buf_si + nrows+1;
4885     buf_si[0]   = nrows;
4886     buf_si_i[0] = 0;
4887     nrows       = 0;
4888     for (i=owners[proc]; i<owners[proc+1]; i++) {
4889       anzi = ai[i+1] - ai[i];
4890       if (anzi) {
4891         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4892         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4893         nrows++;
4894       }
4895     }
4896     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4897     k++;
4898     buf_si += len_si[proc];
4899   }
4900 
4901   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4902   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4903 
4904   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4905   for (i=0; i<merge->nrecv; i++) {
4906     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4907   }
4908 
4909   ierr = PetscFree(len_si);CHKERRQ(ierr);
4910   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4911   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4912   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4913   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4914   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4915   ierr = PetscFree(status);CHKERRQ(ierr);
4916 
4917   /* compute a local seq matrix in each processor */
4918   /*----------------------------------------------*/
4919   /* allocate bi array and free space for accumulating nonzero column info */
4920   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4921   bi[0] = 0;
4922 
4923   /* create and initialize a linked list */
4924   nlnk = N+1;
4925   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4926 
4927   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4928   len  = ai[owners[rank+1]] - ai[owners[rank]];
4929   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4930 
4931   current_space = free_space;
4932 
4933   /* determine symbolic info for each local row */
4934   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4935 
4936   for (k=0; k<merge->nrecv; k++) {
4937     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4938     nrows       = *buf_ri_k[k];
4939     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4940     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4941   }
4942 
4943   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4944   len  = 0;
4945   for (i=0; i<m; i++) {
4946     bnzi = 0;
4947     /* add local non-zero cols of this proc's seqmat into lnk */
4948     arow  = owners[rank] + i;
4949     anzi  = ai[arow+1] - ai[arow];
4950     aj    = a->j + ai[arow];
4951     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4952     bnzi += nlnk;
4953     /* add received col data into lnk */
4954     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4955       if (i == *nextrow[k]) { /* i-th row */
4956         anzi  = *(nextai[k]+1) - *nextai[k];
4957         aj    = buf_rj[k] + *nextai[k];
4958         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4959         bnzi += nlnk;
4960         nextrow[k]++; nextai[k]++;
4961       }
4962     }
4963     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4964 
4965     /* if free space is not available, make more free space */
4966     if (current_space->local_remaining<bnzi) {
4967       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4968       nspacedouble++;
4969     }
4970     /* copy data into free space, then initialize lnk */
4971     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4972     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4973 
4974     current_space->array           += bnzi;
4975     current_space->local_used      += bnzi;
4976     current_space->local_remaining -= bnzi;
4977 
4978     bi[i+1] = bi[i] + bnzi;
4979   }
4980 
4981   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4982 
4983   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4984   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4985   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4986 
4987   /* create symbolic parallel matrix B_mpi */
4988   /*---------------------------------------*/
4989   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4990   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4991   if (n==PETSC_DECIDE) {
4992     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4993   } else {
4994     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4995   }
4996   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4997   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4998   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4999   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5000   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5001 
5002   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5003   B_mpi->assembled    = PETSC_FALSE;
5004   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5005   merge->bi           = bi;
5006   merge->bj           = bj;
5007   merge->buf_ri       = buf_ri;
5008   merge->buf_rj       = buf_rj;
5009   merge->coi          = NULL;
5010   merge->coj          = NULL;
5011   merge->owners_co    = NULL;
5012 
5013   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5014 
5015   /* attach the supporting struct to B_mpi for reuse */
5016   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5017   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5018   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5019   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5020   *mpimat = B_mpi;
5021 
5022   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 /*@C
5027       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5028                  matrices from each processor
5029 
5030     Collective
5031 
5032    Input Parameters:
5033 +    comm - the communicators the parallel matrix will live on
5034 .    seqmat - the input sequential matrices
5035 .    m - number of local rows (or PETSC_DECIDE)
5036 .    n - number of local columns (or PETSC_DECIDE)
5037 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5038 
5039    Output Parameter:
5040 .    mpimat - the parallel matrix generated
5041 
5042     Level: advanced
5043 
5044    Notes:
5045      The dimensions of the sequential matrix in each processor MUST be the same.
5046      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5047      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5048 @*/
5049 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5050 {
5051   PetscErrorCode ierr;
5052   PetscMPIInt    size;
5053 
5054   PetscFunctionBegin;
5055   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5056   if (size == 1) {
5057     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5058     if (scall == MAT_INITIAL_MATRIX) {
5059       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5060     } else {
5061       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5062     }
5063     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5064     PetscFunctionReturn(0);
5065   }
5066   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5067   if (scall == MAT_INITIAL_MATRIX) {
5068     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5069   }
5070   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5071   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 /*@
5076      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5077           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5078           with MatGetSize()
5079 
5080     Not Collective
5081 
5082    Input Parameters:
5083 +    A - the matrix
5084 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 
5086    Output Parameter:
5087 .    A_loc - the local sequential matrix generated
5088 
5089     Level: developer
5090 
5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5092 
5093 @*/
5094 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5095 {
5096   PetscErrorCode ierr;
5097   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5098   Mat_SeqAIJ     *mat,*a,*b;
5099   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5100   MatScalar      *aa,*ba,*cam;
5101   PetscScalar    *ca;
5102   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5103   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5104   PetscBool      match;
5105   MPI_Comm       comm;
5106   PetscMPIInt    size;
5107 
5108   PetscFunctionBegin;
5109   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5110   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5111   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5113   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5114 
5115   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5116   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5117   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5118   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5119   aa = a->a; ba = b->a;
5120   if (scall == MAT_INITIAL_MATRIX) {
5121     if (size == 1) {
5122       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5123       PetscFunctionReturn(0);
5124     }
5125 
5126     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5127     ci[0] = 0;
5128     for (i=0; i<am; i++) {
5129       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5130     }
5131     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5132     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5133     k    = 0;
5134     for (i=0; i<am; i++) {
5135       ncols_o = bi[i+1] - bi[i];
5136       ncols_d = ai[i+1] - ai[i];
5137       /* off-diagonal portion of A */
5138       for (jo=0; jo<ncols_o; jo++) {
5139         col = cmap[*bj];
5140         if (col >= cstart) break;
5141         cj[k]   = col; bj++;
5142         ca[k++] = *ba++;
5143       }
5144       /* diagonal portion of A */
5145       for (j=0; j<ncols_d; j++) {
5146         cj[k]   = cstart + *aj++;
5147         ca[k++] = *aa++;
5148       }
5149       /* off-diagonal portion of A */
5150       for (j=jo; j<ncols_o; j++) {
5151         cj[k]   = cmap[*bj++];
5152         ca[k++] = *ba++;
5153       }
5154     }
5155     /* put together the new matrix */
5156     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5157     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5158     /* Since these are PETSc arrays, change flags to free them as necessary. */
5159     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5160     mat->free_a  = PETSC_TRUE;
5161     mat->free_ij = PETSC_TRUE;
5162     mat->nonew   = 0;
5163   } else if (scall == MAT_REUSE_MATRIX) {
5164     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5165     ci = mat->i; cj = mat->j; cam = mat->a;
5166     for (i=0; i<am; i++) {
5167       /* off-diagonal portion of A */
5168       ncols_o = bi[i+1] - bi[i];
5169       for (jo=0; jo<ncols_o; jo++) {
5170         col = cmap[*bj];
5171         if (col >= cstart) break;
5172         *cam++ = *ba++; bj++;
5173       }
5174       /* diagonal portion of A */
5175       ncols_d = ai[i+1] - ai[i];
5176       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5177       /* off-diagonal portion of A */
5178       for (j=jo; j<ncols_o; j++) {
5179         *cam++ = *ba++; bj++;
5180       }
5181     }
5182   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5183   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5184   PetscFunctionReturn(0);
5185 }
5186 
5187 /*@C
5188      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5189 
5190     Not Collective
5191 
5192    Input Parameters:
5193 +    A - the matrix
5194 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5195 -    row, col - index sets of rows and columns to extract (or NULL)
5196 
5197    Output Parameter:
5198 .    A_loc - the local sequential matrix generated
5199 
5200     Level: developer
5201 
5202 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5203 
5204 @*/
5205 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5206 {
5207   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5208   PetscErrorCode ierr;
5209   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5210   IS             isrowa,iscola;
5211   Mat            *aloc;
5212   PetscBool      match;
5213 
5214   PetscFunctionBegin;
5215   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5216   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5217   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5218   if (!row) {
5219     start = A->rmap->rstart; end = A->rmap->rend;
5220     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5221   } else {
5222     isrowa = *row;
5223   }
5224   if (!col) {
5225     start = A->cmap->rstart;
5226     cmap  = a->garray;
5227     nzA   = a->A->cmap->n;
5228     nzB   = a->B->cmap->n;
5229     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5230     ncols = 0;
5231     for (i=0; i<nzB; i++) {
5232       if (cmap[i] < start) idx[ncols++] = cmap[i];
5233       else break;
5234     }
5235     imark = i;
5236     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5237     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5238     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5239   } else {
5240     iscola = *col;
5241   }
5242   if (scall != MAT_INITIAL_MATRIX) {
5243     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5244     aloc[0] = *A_loc;
5245   }
5246   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5247   if (!col) { /* attach global id of condensed columns */
5248     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5249   }
5250   *A_loc = aloc[0];
5251   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5252   if (!row) {
5253     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5254   }
5255   if (!col) {
5256     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5257   }
5258   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5259   PetscFunctionReturn(0);
5260 }
5261 
5262 /*@C
5263     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5264 
5265     Collective on Mat
5266 
5267    Input Parameters:
5268 +    A,B - the matrices in mpiaij format
5269 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5270 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5271 
5272    Output Parameter:
5273 +    rowb, colb - index sets of rows and columns of B to extract
5274 -    B_seq - the sequential matrix generated
5275 
5276     Level: developer
5277 
5278 @*/
5279 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5280 {
5281   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5282   PetscErrorCode ierr;
5283   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5284   IS             isrowb,iscolb;
5285   Mat            *bseq=NULL;
5286 
5287   PetscFunctionBegin;
5288   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5289     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5290   }
5291   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5292 
5293   if (scall == MAT_INITIAL_MATRIX) {
5294     start = A->cmap->rstart;
5295     cmap  = a->garray;
5296     nzA   = a->A->cmap->n;
5297     nzB   = a->B->cmap->n;
5298     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5299     ncols = 0;
5300     for (i=0; i<nzB; i++) {  /* row < local row index */
5301       if (cmap[i] < start) idx[ncols++] = cmap[i];
5302       else break;
5303     }
5304     imark = i;
5305     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5306     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5307     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5308     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5309   } else {
5310     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5311     isrowb  = *rowb; iscolb = *colb;
5312     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5313     bseq[0] = *B_seq;
5314   }
5315   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5316   *B_seq = bseq[0];
5317   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5318   if (!rowb) {
5319     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5320   } else {
5321     *rowb = isrowb;
5322   }
5323   if (!colb) {
5324     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5325   } else {
5326     *colb = iscolb;
5327   }
5328   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /*
5333     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5334     of the OFF-DIAGONAL portion of local A
5335 
5336     Collective on Mat
5337 
5338    Input Parameters:
5339 +    A,B - the matrices in mpiaij format
5340 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5341 
5342    Output Parameter:
5343 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5344 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5345 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5346 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5347 
5348     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5349      for this matrix. This is not desirable..
5350 
5351     Level: developer
5352 
5353 */
5354 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5355 {
5356   PetscErrorCode         ierr;
5357   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5358   Mat_SeqAIJ             *b_oth;
5359   VecScatter             ctx;
5360   MPI_Comm               comm;
5361   const PetscMPIInt      *rprocs,*sprocs;
5362   const PetscInt         *srow,*rstarts,*sstarts;
5363   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5364   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5365   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5366   MPI_Request            *rwaits = NULL,*swaits = NULL;
5367   MPI_Status             rstatus;
5368   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5369 
5370   PetscFunctionBegin;
5371   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5372   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5373 
5374   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5375     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5376   }
5377   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5378   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5379 
5380   if (size == 1) {
5381     startsj_s = NULL;
5382     bufa_ptr  = NULL;
5383     *B_oth    = NULL;
5384     PetscFunctionReturn(0);
5385   }
5386 
5387   ctx = a->Mvctx;
5388   tag = ((PetscObject)ctx)->tag;
5389 
5390   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5391   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5392   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5393   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5394   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5395   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5396   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5397 
5398   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5399   if (scall == MAT_INITIAL_MATRIX) {
5400     /* i-array */
5401     /*---------*/
5402     /*  post receives */
5403     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5404     for (i=0; i<nrecvs; i++) {
5405       rowlen = rvalues + rstarts[i]*rbs;
5406       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5407       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5408     }
5409 
5410     /* pack the outgoing message */
5411     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5412 
5413     sstartsj[0] = 0;
5414     rstartsj[0] = 0;
5415     len         = 0; /* total length of j or a array to be sent */
5416     if (nsends) {
5417       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5418       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5419     }
5420     for (i=0; i<nsends; i++) {
5421       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5422       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5423       for (j=0; j<nrows; j++) {
5424         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5425         for (l=0; l<sbs; l++) {
5426           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5427 
5428           rowlen[j*sbs+l] = ncols;
5429 
5430           len += ncols;
5431           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5432         }
5433         k++;
5434       }
5435       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5436 
5437       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5438     }
5439     /* recvs and sends of i-array are completed */
5440     i = nrecvs;
5441     while (i--) {
5442       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5443     }
5444     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5445     ierr = PetscFree(svalues);CHKERRQ(ierr);
5446 
5447     /* allocate buffers for sending j and a arrays */
5448     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5449     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5450 
5451     /* create i-array of B_oth */
5452     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5453 
5454     b_othi[0] = 0;
5455     len       = 0; /* total length of j or a array to be received */
5456     k         = 0;
5457     for (i=0; i<nrecvs; i++) {
5458       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5459       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5460       for (j=0; j<nrows; j++) {
5461         b_othi[k+1] = b_othi[k] + rowlen[j];
5462         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5463         k++;
5464       }
5465       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5466     }
5467     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5468 
5469     /* allocate space for j and a arrrays of B_oth */
5470     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5471     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5472 
5473     /* j-array */
5474     /*---------*/
5475     /*  post receives of j-array */
5476     for (i=0; i<nrecvs; i++) {
5477       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5478       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5479     }
5480 
5481     /* pack the outgoing message j-array */
5482     if (nsends) k = sstarts[0];
5483     for (i=0; i<nsends; i++) {
5484       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5485       bufJ  = bufj+sstartsj[i];
5486       for (j=0; j<nrows; j++) {
5487         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5488         for (ll=0; ll<sbs; ll++) {
5489           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5490           for (l=0; l<ncols; l++) {
5491             *bufJ++ = cols[l];
5492           }
5493           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5494         }
5495       }
5496       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5497     }
5498 
5499     /* recvs and sends of j-array are completed */
5500     i = nrecvs;
5501     while (i--) {
5502       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5503     }
5504     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5505   } else if (scall == MAT_REUSE_MATRIX) {
5506     sstartsj = *startsj_s;
5507     rstartsj = *startsj_r;
5508     bufa     = *bufa_ptr;
5509     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5510     b_otha   = b_oth->a;
5511   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5512 
5513   /* a-array */
5514   /*---------*/
5515   /*  post receives of a-array */
5516   for (i=0; i<nrecvs; i++) {
5517     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5518     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5519   }
5520 
5521   /* pack the outgoing message a-array */
5522   if (nsends) k = sstarts[0];
5523   for (i=0; i<nsends; i++) {
5524     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5525     bufA  = bufa+sstartsj[i];
5526     for (j=0; j<nrows; j++) {
5527       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5528       for (ll=0; ll<sbs; ll++) {
5529         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5530         for (l=0; l<ncols; l++) {
5531           *bufA++ = vals[l];
5532         }
5533         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5534       }
5535     }
5536     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5537   }
5538   /* recvs and sends of a-array are completed */
5539   i = nrecvs;
5540   while (i--) {
5541     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5542   }
5543   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5544   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5545 
5546   if (scall == MAT_INITIAL_MATRIX) {
5547     /* put together the new matrix */
5548     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5549 
5550     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5551     /* Since these are PETSc arrays, change flags to free them as necessary. */
5552     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5553     b_oth->free_a  = PETSC_TRUE;
5554     b_oth->free_ij = PETSC_TRUE;
5555     b_oth->nonew   = 0;
5556 
5557     ierr = PetscFree(bufj);CHKERRQ(ierr);
5558     if (!startsj_s || !bufa_ptr) {
5559       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5560       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5561     } else {
5562       *startsj_s = sstartsj;
5563       *startsj_r = rstartsj;
5564       *bufa_ptr  = bufa;
5565     }
5566   }
5567 
5568   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5569   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5570   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5571   PetscFunctionReturn(0);
5572 }
5573 
5574 /*@C
5575   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5576 
5577   Not Collective
5578 
5579   Input Parameters:
5580 . A - The matrix in mpiaij format
5581 
5582   Output Parameter:
5583 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5584 . colmap - A map from global column index to local index into lvec
5585 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5586 
5587   Level: developer
5588 
5589 @*/
5590 #if defined(PETSC_USE_CTABLE)
5591 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5592 #else
5593 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5594 #endif
5595 {
5596   Mat_MPIAIJ *a;
5597 
5598   PetscFunctionBegin;
5599   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5600   PetscValidPointer(lvec, 2);
5601   PetscValidPointer(colmap, 3);
5602   PetscValidPointer(multScatter, 4);
5603   a = (Mat_MPIAIJ*) A->data;
5604   if (lvec) *lvec = a->lvec;
5605   if (colmap) *colmap = a->colmap;
5606   if (multScatter) *multScatter = a->Mvctx;
5607   PetscFunctionReturn(0);
5608 }
5609 
5610 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5611 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5612 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5613 #if defined(PETSC_HAVE_MKL_SPARSE)
5614 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5615 #endif
5616 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5617 #if defined(PETSC_HAVE_ELEMENTAL)
5618 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5619 #endif
5620 #if defined(PETSC_HAVE_HYPRE)
5621 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5622 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5623 #endif
5624 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5625 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5626 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5627 
5628 /*
5629     Computes (B'*A')' since computing B*A directly is untenable
5630 
5631                n                       p                          p
5632         (              )       (              )         (                  )
5633       m (      A       )  *  n (       B      )   =   m (         C        )
5634         (              )       (              )         (                  )
5635 
5636 */
5637 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5638 {
5639   PetscErrorCode ierr;
5640   Mat            At,Bt,Ct;
5641 
5642   PetscFunctionBegin;
5643   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5644   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5645   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5646   ierr = MatDestroy(&At);CHKERRQ(ierr);
5647   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5648   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5649   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5650   PetscFunctionReturn(0);
5651 }
5652 
5653 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5654 {
5655   PetscErrorCode ierr;
5656   PetscInt       m=A->rmap->n,n=B->cmap->n;
5657   Mat            Cmat;
5658 
5659   PetscFunctionBegin;
5660   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5661   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5662   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5663   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5664   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5665   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5666   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5667   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5668 
5669   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5670 
5671   *C = Cmat;
5672   PetscFunctionReturn(0);
5673 }
5674 
5675 /* ----------------------------------------------------------------*/
5676 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5677 {
5678   PetscErrorCode ierr;
5679 
5680   PetscFunctionBegin;
5681   if (scall == MAT_INITIAL_MATRIX) {
5682     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5683     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5684     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5685   }
5686   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5687   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5688   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5689   PetscFunctionReturn(0);
5690 }
5691 
5692 /*MC
5693    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5694 
5695    Options Database Keys:
5696 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5697 
5698   Level: beginner
5699 
5700 .seealso: MatCreateAIJ()
5701 M*/
5702 
5703 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5704 {
5705   Mat_MPIAIJ     *b;
5706   PetscErrorCode ierr;
5707   PetscMPIInt    size;
5708 
5709   PetscFunctionBegin;
5710   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5711 
5712   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5713   B->data       = (void*)b;
5714   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5715   B->assembled  = PETSC_FALSE;
5716   B->insertmode = NOT_SET_VALUES;
5717   b->size       = size;
5718 
5719   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5720 
5721   /* build cache for off array entries formed */
5722   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5723 
5724   b->donotstash  = PETSC_FALSE;
5725   b->colmap      = 0;
5726   b->garray      = 0;
5727   b->roworiented = PETSC_TRUE;
5728 
5729   /* stuff used for matrix vector multiply */
5730   b->lvec  = NULL;
5731   b->Mvctx = NULL;
5732 
5733   /* stuff for MatGetRow() */
5734   b->rowindices   = 0;
5735   b->rowvalues    = 0;
5736   b->getrowactive = PETSC_FALSE;
5737 
5738   /* flexible pointer used in CUSP/CUSPARSE classes */
5739   b->spptr = NULL;
5740 
5741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5748   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5749   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5750   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5751 #if defined(PETSC_HAVE_MKL_SPARSE)
5752   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5753 #endif
5754   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5755   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5756 #if defined(PETSC_HAVE_ELEMENTAL)
5757   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5758 #endif
5759 #if defined(PETSC_HAVE_HYPRE)
5760   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5761 #endif
5762   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5763   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5764   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5765   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5766   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5767 #if defined(PETSC_HAVE_HYPRE)
5768   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5769 #endif
5770   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5771   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5772   PetscFunctionReturn(0);
5773 }
5774 
5775 /*@C
5776      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5777          and "off-diagonal" part of the matrix in CSR format.
5778 
5779    Collective
5780 
5781    Input Parameters:
5782 +  comm - MPI communicator
5783 .  m - number of local rows (Cannot be PETSC_DECIDE)
5784 .  n - This value should be the same as the local size used in creating the
5785        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5786        calculated if N is given) For square matrices n is almost always m.
5787 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5788 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5789 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5790 .   j - column indices
5791 .   a - matrix values
5792 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5793 .   oj - column indices
5794 -   oa - matrix values
5795 
5796    Output Parameter:
5797 .   mat - the matrix
5798 
5799    Level: advanced
5800 
5801    Notes:
5802        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5803        must free the arrays once the matrix has been destroyed and not before.
5804 
5805        The i and j indices are 0 based
5806 
5807        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5808 
5809        This sets local rows and cannot be used to set off-processor values.
5810 
5811        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5812        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5813        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5814        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5815        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5816        communication if it is known that only local entries will be set.
5817 
5818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5819           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5820 @*/
5821 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5822 {
5823   PetscErrorCode ierr;
5824   Mat_MPIAIJ     *maij;
5825 
5826   PetscFunctionBegin;
5827   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5828   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5829   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5830   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5831   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5832   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5833   maij = (Mat_MPIAIJ*) (*mat)->data;
5834 
5835   (*mat)->preallocated = PETSC_TRUE;
5836 
5837   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5838   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5839 
5840   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5841   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5842 
5843   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5844   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5845   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5846   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5847 
5848   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5849   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5850   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5851   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5852   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5853   PetscFunctionReturn(0);
5854 }
5855 
5856 /*
5857     Special version for direct calls from Fortran
5858 */
5859 #include <petsc/private/fortranimpl.h>
5860 
5861 /* Change these macros so can be used in void function */
5862 #undef CHKERRQ
5863 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5864 #undef SETERRQ2
5865 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5866 #undef SETERRQ3
5867 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5868 #undef SETERRQ
5869 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5870 
5871 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5872 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5873 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5874 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5875 #else
5876 #endif
5877 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5878 {
5879   Mat            mat  = *mmat;
5880   PetscInt       m    = *mm, n = *mn;
5881   InsertMode     addv = *maddv;
5882   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5883   PetscScalar    value;
5884   PetscErrorCode ierr;
5885 
5886   MatCheckPreallocated(mat,1);
5887   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5888 
5889 #if defined(PETSC_USE_DEBUG)
5890   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5891 #endif
5892   {
5893     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5894     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5895     PetscBool roworiented = aij->roworiented;
5896 
5897     /* Some Variables required in the macro */
5898     Mat        A                 = aij->A;
5899     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5900     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5901     MatScalar  *aa               = a->a;
5902     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5903     Mat        B                 = aij->B;
5904     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5905     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5906     MatScalar  *ba               = b->a;
5907 
5908     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5909     PetscInt  nonew = a->nonew;
5910     MatScalar *ap1,*ap2;
5911 
5912     PetscFunctionBegin;
5913     for (i=0; i<m; i++) {
5914       if (im[i] < 0) continue;
5915 #if defined(PETSC_USE_DEBUG)
5916       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5917 #endif
5918       if (im[i] >= rstart && im[i] < rend) {
5919         row      = im[i] - rstart;
5920         lastcol1 = -1;
5921         rp1      = aj + ai[row];
5922         ap1      = aa + ai[row];
5923         rmax1    = aimax[row];
5924         nrow1    = ailen[row];
5925         low1     = 0;
5926         high1    = nrow1;
5927         lastcol2 = -1;
5928         rp2      = bj + bi[row];
5929         ap2      = ba + bi[row];
5930         rmax2    = bimax[row];
5931         nrow2    = bilen[row];
5932         low2     = 0;
5933         high2    = nrow2;
5934 
5935         for (j=0; j<n; j++) {
5936           if (roworiented) value = v[i*n+j];
5937           else value = v[i+j*m];
5938           if (in[j] >= cstart && in[j] < cend) {
5939             col = in[j] - cstart;
5940             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5941             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5942           } else if (in[j] < 0) continue;
5943 #if defined(PETSC_USE_DEBUG)
5944           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5945           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5946 #endif
5947           else {
5948             if (mat->was_assembled) {
5949               if (!aij->colmap) {
5950                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5951               }
5952 #if defined(PETSC_USE_CTABLE)
5953               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5954               col--;
5955 #else
5956               col = aij->colmap[in[j]] - 1;
5957 #endif
5958               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5959               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5960                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5961                 col  =  in[j];
5962                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5963                 B     = aij->B;
5964                 b     = (Mat_SeqAIJ*)B->data;
5965                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5966                 rp2   = bj + bi[row];
5967                 ap2   = ba + bi[row];
5968                 rmax2 = bimax[row];
5969                 nrow2 = bilen[row];
5970                 low2  = 0;
5971                 high2 = nrow2;
5972                 bm    = aij->B->rmap->n;
5973                 ba    = b->a;
5974               }
5975             } else col = in[j];
5976             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5977           }
5978         }
5979       } else if (!aij->donotstash) {
5980         if (roworiented) {
5981           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5982         } else {
5983           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5984         }
5985       }
5986     }
5987   }
5988   PetscFunctionReturnVoid();
5989 }
5990