xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision af4fa82cc29c77689f3cd2af837601dbdc3602c2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb,*aav,*bav;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92 
93   ia   = a->i;
94   ib   = b->i;
95   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
96   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) {
101       cnt++;
102       goto ok1;
103     }
104     aa = aav + ia[i];
105     for (j=0; j<na; j++) {
106       if (aa[j] != 0.0) goto ok1;
107     }
108     bb = bav + ib[i];
109     for (j=0; j <nb; j++) {
110       if (bb[j] != 0.0) goto ok1;
111     }
112     cnt++;
113 ok1:;
114   }
115   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
116   if (!n0rows) {
117     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
118     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
119     PetscFunctionReturn(0);
120   }
121   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
122   cnt  = 0;
123   for (i=0; i<m; i++) {
124     na = ia[i+1] - ia[i];
125     nb = ib[i+1] - ib[i];
126     if (!na && !nb) continue;
127     aa = aav + ia[i];
128     for (j=0; j<na;j++) {
129       if (aa[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134     bb = bav + ib[i];
135     for (j=0; j<nb; j++) {
136       if (bb[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141 ok2:;
142   }
143   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
145   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
150 {
151   PetscErrorCode    ierr;
152   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
153   PetscBool         cong;
154 
155   PetscFunctionBegin;
156   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
157   if (Y->assembled && cong) {
158     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
159   } else {
160     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
166 {
167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
168   PetscErrorCode ierr;
169   PetscInt       i,rstart,nrows,*rows;
170 
171   PetscFunctionBegin;
172   *zrows = NULL;
173   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
174   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
175   for (i=0; i<nrows; i++) rows[i] += rstart;
176   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
181 {
182   PetscErrorCode    ierr;
183   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
184   PetscInt          i,n,*garray = aij->garray;
185   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
186   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
187   PetscReal         *work;
188   const PetscScalar *dummy;
189 
190   PetscFunctionBegin;
191   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
192   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
193   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
197   if (type == NORM_2) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
203     }
204   } else if (type == NORM_1) {
205     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
206       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
207     }
208     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
209       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
210     }
211   } else if (type == NORM_INFINITY) {
212     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
213       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
214     }
215     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
216       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
217     }
218 
219   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
220   if (type == NORM_INFINITY) {
221     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
222   } else {
223     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
224   }
225   ierr = PetscFree(work);CHKERRQ(ierr);
226   if (type == NORM_2) {
227     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
233 {
234   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
235   IS              sis,gis;
236   PetscErrorCode  ierr;
237   const PetscInt  *isis,*igis;
238   PetscInt        n,*iis,nsis,ngis,rstart,i;
239 
240   PetscFunctionBegin;
241   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
242   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
243   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
244   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
245   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
246   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
250   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
251   n    = ngis + nsis;
252   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
253   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
254   for (i=0; i<n; i++) iis[i] += rstart;
255   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
256 
257   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
259   ierr = ISDestroy(&sis);CHKERRQ(ierr);
260   ierr = ISDestroy(&gis);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265   Local utility routine that creates a mapping from the global column
266 number to the local number in the off-diagonal part of the local
267 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
268 a slightly higher hash table cost; without it it is not scalable (each processor
269 has an order N integer array but is fast to access.
270 */
271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
272 {
273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       n = aij->B->cmap->n,i;
276 
277   PetscFunctionBegin;
278   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
279 #if defined(PETSC_USE_CTABLE)
280   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
281   for (i=0; i<n; i++) {
282     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
283   }
284 #else
285   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
286   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
287   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
293 { \
294     if (col <= lastcol1)  low1 = 0;     \
295     else                 high1 = nrow1; \
296     lastcol1 = col;\
297     while (high1-low1 > 5) { \
298       t = (low1+high1)/2; \
299       if (rp1[t] > col) high1 = t; \
300       else              low1  = t; \
301     } \
302       for (_i=low1; _i<high1; _i++) { \
303         if (rp1[_i] > col) break; \
304         if (rp1[_i] == col) { \
305           if (addv == ADD_VALUES) { \
306             ap1[_i] += value;   \
307             /* Not sure LogFlops will slow dow the code or not */ \
308             (void)PetscLogFlops(1.0);   \
309            } \
310           else                    ap1[_i] = value; \
311           inserted = PETSC_TRUE; \
312           goto a_noinsert; \
313         } \
314       }  \
315       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
316       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
317       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
318       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
319       N = nrow1++ - 1; a->nz++; high1++; \
320       /* shift up all the later entries in this row */ \
321       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
322       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
323       rp1[_i] = col;  \
324       ap1[_i] = value;  \
325       A->nonzerostate++;\
326       a_noinsert: ; \
327       ailen[row] = nrow1; \
328 }
329 
330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
331   { \
332     if (col <= lastcol2) low2 = 0;                        \
333     else high2 = nrow2;                                   \
334     lastcol2 = col;                                       \
335     while (high2-low2 > 5) {                              \
336       t = (low2+high2)/2;                                 \
337       if (rp2[t] > col) high2 = t;                        \
338       else             low2  = t;                         \
339     }                                                     \
340     for (_i=low2; _i<high2; _i++) {                       \
341       if (rp2[_i] > col) break;                           \
342       if (rp2[_i] == col) {                               \
343         if (addv == ADD_VALUES) {                         \
344           ap2[_i] += value;                               \
345           (void)PetscLogFlops(1.0);                       \
346         }                                                 \
347         else                    ap2[_i] = value;          \
348         inserted = PETSC_TRUE;                            \
349         goto b_noinsert;                                  \
350       }                                                   \
351     }                                                     \
352     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
353     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
354     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
355     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
356     N = nrow2++ - 1; b->nz++; high2++;                    \
357     /* shift up all the later entries in this row */      \
358     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
359     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
360     rp2[_i] = col;                                        \
361     ap2[_i] = value;                                      \
362     B->nonzerostate++;                                    \
363     b_noinsert: ;                                         \
364     bilen[row] = nrow2;                                   \
365   }
366 
367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
368 {
369   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
370   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
371   PetscErrorCode ierr;
372   PetscInt       l,*garray = mat->garray,diag;
373 
374   PetscFunctionBegin;
375   /* code only works for square matrices A */
376 
377   /* find size of row to the left of the diagonal part */
378   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
379   row  = row - diag;
380   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
381     if (garray[b->j[b->i[row]+l]] > diag) break;
382   }
383   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
384 
385   /* diagonal part */
386   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
387 
388   /* right of diagonal part */
389   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
390 #if defined(PETSC_HAVE_DEVICE)
391   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
392 #endif
393   PetscFunctionReturn(0);
394 }
395 
396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
397 {
398   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
399   PetscScalar    value = 0.0;
400   PetscErrorCode ierr;
401   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
402   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
403   PetscBool      roworiented = aij->roworiented;
404 
405   /* Some Variables required in the macro */
406   Mat        A                    = aij->A;
407   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
408   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
409   PetscBool  ignorezeroentries    = a->ignorezeroentries;
410   Mat        B                    = aij->B;
411   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
412   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
413   MatScalar  *aa,*ba;
414   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
415    * cannot use "#if defined" inside a macro. */
416   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
417 
418   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
419   PetscInt  nonew;
420   MatScalar *ap1,*ap2;
421 
422   PetscFunctionBegin;
423 #if defined(PETSC_HAVE_DEVICE)
424   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
425     const PetscScalar *dummy;
426     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
427     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
428   }
429   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
430     const PetscScalar *dummy;
431     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
432     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
433   }
434 #endif
435   aa = a->a;
436   ba = b->a;
437   for (i=0; i<m; i++) {
438     if (im[i] < 0) continue;
439     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
440     if (im[i] >= rstart && im[i] < rend) {
441       row      = im[i] - rstart;
442       lastcol1 = -1;
443       rp1      = aj + ai[row];
444       ap1      = aa + ai[row];
445       rmax1    = aimax[row];
446       nrow1    = ailen[row];
447       low1     = 0;
448       high1    = nrow1;
449       lastcol2 = -1;
450       rp2      = bj + bi[row];
451       ap2      = ba + bi[row];
452       rmax2    = bimax[row];
453       nrow2    = bilen[row];
454       low2     = 0;
455       high2    = nrow2;
456 
457       for (j=0; j<n; j++) {
458         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
459         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
460         if (in[j] >= cstart && in[j] < cend) {
461           col   = in[j] - cstart;
462           nonew = a->nonew;
463           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
464 #if defined(PETSC_HAVE_DEVICE)
465           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
466 #endif
467         } else if (in[j] < 0) continue;
468         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
469         else {
470           if (mat->was_assembled) {
471             if (!aij->colmap) {
472               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
473             }
474 #if defined(PETSC_USE_CTABLE)
475             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
476             col--;
477 #else
478             col = aij->colmap[in[j]] - 1;
479 #endif
480             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
481               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
482               col  =  in[j];
483               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
484               B        = aij->B;
485               b        = (Mat_SeqAIJ*)B->data;
486               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
487               rp2      = bj + bi[row];
488               ap2      = ba + bi[row];
489               rmax2    = bimax[row];
490               nrow2    = bilen[row];
491               low2     = 0;
492               high2    = nrow2;
493               bm       = aij->B->rmap->n;
494               ba       = b->a;
495               inserted = PETSC_FALSE;
496             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
497               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
498                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
499               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
500             }
501           } else col = in[j];
502           nonew = b->nonew;
503           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
504 #if defined(PETSC_HAVE_DEVICE)
505           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
506 #endif
507         }
508       }
509     } else {
510       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
526     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
527     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
528 */
529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
530 {
531   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
532   Mat            A           = aij->A; /* diagonal part of the matrix */
533   Mat            B           = aij->B; /* offdiagonal part of the matrix */
534   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
535   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
536   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
537   PetscInt       *ailen      = a->ilen,*aj = a->j;
538   PetscInt       *bilen      = b->ilen,*bj = b->j;
539   PetscInt       am          = aij->A->rmap->n,j;
540   PetscInt       diag_so_far = 0,dnz;
541   PetscInt       offd_so_far = 0,onz;
542 
543   PetscFunctionBegin;
544   /* Iterate over all rows of the matrix */
545   for (j=0; j<am; j++) {
546     dnz = onz = 0;
547     /*  Iterate over all non-zero columns of the current row */
548     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
549       /* If column is in the diagonal */
550       if (mat_j[col] >= cstart && mat_j[col] < cend) {
551         aj[diag_so_far++] = mat_j[col] - cstart;
552         dnz++;
553       } else { /* off-diagonal entries */
554         bj[offd_so_far++] = mat_j[col];
555         onz++;
556       }
557     }
558     ailen[j] = dnz;
559     bilen[j] = onz;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
566     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
567     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
568     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
569     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
570 */
571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
572 {
573   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
574   Mat            A      = aij->A; /* diagonal part of the matrix */
575   Mat            B      = aij->B; /* offdiagonal part of the matrix */
576   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
577   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
578   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
579   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
580   PetscInt       *ailen = a->ilen,*aj = a->j;
581   PetscInt       *bilen = b->ilen,*bj = b->j;
582   PetscInt       am     = aij->A->rmap->n,j;
583   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
584   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
585   PetscScalar    *aa = a->a,*ba = b->a;
586 
587   PetscFunctionBegin;
588   /* Iterate over all rows of the matrix */
589   for (j=0; j<am; j++) {
590     dnz_row = onz_row = 0;
591     rowstart_offd = full_offd_i[j];
592     rowstart_diag = full_diag_i[j];
593     /*  Iterate over all non-zero columns of the current row */
594     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
595       /* If column is in the diagonal */
596       if (mat_j[col] >= cstart && mat_j[col] < cend) {
597         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
598         aa[rowstart_diag+dnz_row] = mat_a[col];
599         dnz_row++;
600       } else { /* off-diagonal entries */
601         bj[rowstart_offd+onz_row] = mat_j[col];
602         ba[rowstart_offd+onz_row] = mat_a[col];
603         onz_row++;
604       }
605     }
606     ailen[j] = dnz_row;
607     bilen[j] = onz_row;
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613 {
614   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
615   PetscErrorCode ierr;
616   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
617   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col  = idxn[j] - cstart;
630           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!aij->colmap) {
633             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined(PETSC_USE_CTABLE)
636           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
637           col--;
638 #else
639           col = aij->colmap[idxn[j]] - 1;
640 #endif
641           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
642           else {
643             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
644           }
645         }
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   PetscErrorCode ierr;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscErrorCode ierr;
671   PetscMPIInt    n;
672   PetscInt       i,j,rstart,ncols,flg;
673   PetscInt       *row,*col;
674   PetscBool      other_disassembled;
675   PetscScalar    *val;
676 
677   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
678 
679   PetscFunctionBegin;
680   if (!aij->donotstash && !mat->nooffprocentries) {
681     while (1) {
682       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
683       if (!flg) break;
684 
685       for (i=0; i<n;) {
686         /* Now identify the consecutive vals belonging to the same row */
687         for (j=i,rstart=row[j]; j<n; j++) {
688           if (row[j] != rstart) break;
689         }
690         if (j < n) ncols = j-i;
691         else       ncols = n-i;
692         /* Now assemble all these values with a single function call */
693         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
694         i    = j;
695       }
696     }
697     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_HAVE_DEVICE)
700   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
701   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
702   if (mat->boundtocpu) {
703     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
705   }
706 #endif
707   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
708   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
709 
710   /* determine if any processor has disassembled, if so we must
711      also disassemble ourself, in order that we may reassemble. */
712   /*
713      if nonzero structure of submatrix B cannot change then we know that
714      no processor disassembled thus we can skip this stuff
715   */
716   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
717     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
718     if (mat->was_assembled && !other_disassembled) {
719 #if defined(PETSC_HAVE_DEVICE)
720       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
721 #endif
722       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
723     }
724   }
725   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
726     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
727   }
728   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
729 #if defined(PETSC_HAVE_DEVICE)
730   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
731 #endif
732   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
733   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
734 
735   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
736 
737   aij->rowvalues = NULL;
738 
739   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
740 
741   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
742   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
743     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
744     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745   }
746 #if defined(PETSC_HAVE_DEVICE)
747   mat->offloadmask = PETSC_OFFLOAD_BOTH;
748 #endif
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
753 {
754   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
759   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
764 {
765   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
766   PetscObjectState sA, sB;
767   PetscInt        *lrows;
768   PetscInt         r, len;
769   PetscBool        cong, lch, gch;
770   PetscErrorCode   ierr;
771 
772   PetscFunctionBegin;
773   /* get locally owned rows */
774   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
775   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
776   /* fix right hand side if needed */
777   if (x && b) {
778     const PetscScalar *xx;
779     PetscScalar       *bb;
780 
781     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788 
789   sA = mat->A->nonzerostate;
790   sB = mat->B->nonzerostate;
791 
792   if (diag != 0.0 && cong) {
793     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
794     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
796     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
797     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
798     PetscInt   nnwA, nnwB;
799     PetscBool  nnzA, nnzB;
800 
801     nnwA = aijA->nonew;
802     nnwB = aijB->nonew;
803     nnzA = aijA->keepnonzeropattern;
804     nnzB = aijB->keepnonzeropattern;
805     if (!nnzA) {
806       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
807       aijA->nonew = 0;
808     }
809     if (!nnzB) {
810       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
811       aijB->nonew = 0;
812     }
813     /* Must zero here before the next loop */
814     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     for (r = 0; r < len; ++r) {
817       const PetscInt row = lrows[r] + A->rmap->rstart;
818       if (row >= A->cmap->N) continue;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     aijA->nonew = nnwA;
822     aijB->nonew = nnwB;
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830 
831   /* reduce nonzerostate */
832   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
833   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
834   if (gch) A->nonzerostate++;
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscMPIInt       p = 0;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx   = rows[r];
862     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
863     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
870   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
871   /* Collect flags for rows to be zeroed */
872   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
874   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
877   /* zero diagonal part of matrix */
878   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
879   /* handle off diagonal part of matrix */
880   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
881   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
882   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
883   for (i=0; i<len; i++) bb[lrows[i]] = 1;
884   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
885   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
888   if (x && b) { /* this code is buggy when the row and column layout don't match */
889     PetscBool cong;
890 
891     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
892     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
893     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
896     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
897   }
898   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
899   /* remove zeroed rows of off diagonal matrix */
900   ii = aij->i;
901   for (i=0; i<len; i++) {
902     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
903   }
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i=0; i<m; i++) {
910       n  = ii[i+1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij->a + ii[i];
913 
914       for (j=0; j<n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa*xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i=0; i<m; i++) {
927       n  = ii[i+1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij->a + ii[i];
930       for (j=0; j<n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa*xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
942     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
943   }
944   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
945   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
946   ierr = PetscFree(lrows);CHKERRQ(ierr);
947 
948   /* only change matrix nonzero state if pattern was allowed to be changed */
949   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
950     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
951     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960   PetscInt       nt;
961   VecScatter     Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
965   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
966   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
968   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   VecScatter     Mvctx = a->Mvctx;
988 
989   PetscFunctionBegin;
990   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
992   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   /* do local part */
1006   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* add partial results together */
1008   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1014 {
1015   MPI_Comm       comm;
1016   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1017   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1018   IS             Me,Notme;
1019   PetscErrorCode ierr;
1020   PetscInt       M,N,first,last,*notme,i;
1021   PetscBool      lf;
1022   PetscMPIInt    size;
1023 
1024   PetscFunctionBegin;
1025   /* Easy test: symmetric diagonal block */
1026   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1027   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1028   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1029   if (!*f) PetscFunctionReturn(0);
1030   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1031   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1032   if (size == 1) PetscFunctionReturn(0);
1033 
1034   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1035   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1037   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1038   for (i=0; i<first; i++) notme[i] = i;
1039   for (i=last; i<M; i++) notme[i-last+first] = i;
1040   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1041   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1042   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1043   Aoff = Aoffs[0];
1044   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1045   Boff = Boffs[0];
1046   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1048   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1050   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1051   ierr = PetscFree(notme);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1056 {
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* do nondiagonal part */
1071   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1072   /* do local part */
1073   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   /* add partial results together */
1075   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*
1081   This only works correctly for square matrices where the subblock A->A is the
1082    diagonal block
1083 */
1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1085 {
1086   PetscErrorCode ierr;
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113 #if defined(PETSC_USE_LOG)
1114   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1115 #endif
1116   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1119   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1120 #if defined(PETSC_USE_CTABLE)
1121   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1122 #else
1123   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1124 #endif
1125   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1126   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1127   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1128   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1129   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1130   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1131 
1132   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1133   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1134 
1135   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1145 #if defined(PETSC_HAVE_CUDA)
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1147 #endif
1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1150 #endif
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1152 #if defined(PETSC_HAVE_ELEMENTAL)
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1154 #endif
1155 #if defined(PETSC_HAVE_SCALAPACK)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1157 #endif
1158 #if defined(PETSC_HAVE_HYPRE)
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1161 #endif
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1168 #if defined(PETSC_HAVE_MKL_SPARSE)
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1170 #endif
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1180   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1181   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1182   const PetscInt    *garray = aij->garray;
1183   const PetscScalar *aa,*ba;
1184   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1185   PetscInt          *rowlens;
1186   PetscInt          *colidxs;
1187   PetscScalar       *matvals;
1188   PetscErrorCode    ierr;
1189 
1190   PetscFunctionBegin;
1191   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1192 
1193   M  = mat->rmap->N;
1194   N  = mat->cmap->N;
1195   m  = mat->rmap->n;
1196   rs = mat->rmap->rstart;
1197   cs = mat->cmap->rstart;
1198   nz = A->nz + B->nz;
1199 
1200   /* write matrix header */
1201   header[0] = MAT_FILE_CLASSID;
1202   header[1] = M; header[2] = N; header[3] = nz;
1203   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1204   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1205 
1206   /* fill in and store row lengths  */
1207   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1208   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1209   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1210   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1211 
1212   /* fill in and store column indices */
1213   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1214   for (cnt=0, i=0; i<m; i++) {
1215     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1216       if (garray[B->j[jb]] > cs) break;
1217       colidxs[cnt++] = garray[B->j[jb]];
1218     }
1219     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1220       colidxs[cnt++] = A->j[ja] + cs;
1221     for (; jb<B->i[i+1]; jb++)
1222       colidxs[cnt++] = garray[B->j[jb]];
1223   }
1224   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1225   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1226   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1227 
1228   /* fill in and store nonzero values */
1229   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1230   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1231   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1232   for (cnt=0, i=0; i<m; i++) {
1233     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1234       if (garray[B->j[jb]] > cs) break;
1235       matvals[cnt++] = ba[jb];
1236     }
1237     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1238       matvals[cnt++] = aa[ja];
1239     for (; jb<B->i[i+1]; jb++)
1240       matvals[cnt++] = ba[jb];
1241   }
1242   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1243   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1244   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1245   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1246   ierr = PetscFree(matvals);CHKERRQ(ierr);
1247 
1248   /* write block size option to the viewer's .info file */
1249   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #include <petscdraw.h>
1254 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1255 {
1256   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1257   PetscErrorCode    ierr;
1258   PetscMPIInt       rank = aij->rank,size = aij->size;
1259   PetscBool         isdraw,iascii,isbinary;
1260   PetscViewer       sviewer;
1261   PetscViewerFormat format;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1267   if (iascii) {
1268     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1269     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1270       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1271       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1272       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1273       for (i=0; i<(PetscInt)size; i++) {
1274         nmax = PetscMax(nmax,nz[i]);
1275         nmin = PetscMin(nmin,nz[i]);
1276         navg += nz[i];
1277       }
1278       ierr = PetscFree(nz);CHKERRQ(ierr);
1279       navg = navg/size;
1280       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1281       PetscFunctionReturn(0);
1282     }
1283     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1284     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1285       MatInfo   info;
1286       PetscInt *inodes=NULL;
1287 
1288       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1289       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1290       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1291       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1292       if (!inodes) {
1293         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1294                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1295       } else {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1298       }
1299       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1300       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1301       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1302       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1303       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1306       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1307       PetscFunctionReturn(0);
1308     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1309       PetscInt inodecount,inodelimit,*inodes;
1310       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1311       if (inodes) {
1312         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1313       } else {
1314         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1315       }
1316       PetscFunctionReturn(0);
1317     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1318       PetscFunctionReturn(0);
1319     }
1320   } else if (isbinary) {
1321     if (size == 1) {
1322       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1323       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1324     } else {
1325       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1326     }
1327     PetscFunctionReturn(0);
1328   } else if (iascii && size == 1) {
1329     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1330     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1331     PetscFunctionReturn(0);
1332   } else if (isdraw) {
1333     PetscDraw draw;
1334     PetscBool isnull;
1335     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1336     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1337     if (isnull) PetscFunctionReturn(0);
1338   }
1339 
1340   { /* assemble the entire matrix onto first processor */
1341     Mat A = NULL, Av;
1342     IS  isrow,iscol;
1343 
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1345     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1346     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1347     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1348 /*  The commented code uses MatCreateSubMatrices instead */
1349 /*
1350     Mat *AA, A = NULL, Av;
1351     IS  isrow,iscol;
1352 
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1354     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1355     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1356     if (!rank) {
1357        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1358        A    = AA[0];
1359        Av   = AA[0];
1360     }
1361     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1362 */
1363     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1364     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1365     /*
1366        Everyone has to call to draw the matrix since the graphics waits are
1367        synchronized across all processors that share the PetscDraw object
1368     */
1369     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1370     if (!rank) {
1371       if (((PetscObject)mat)->name) {
1372         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1373       }
1374       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1375     }
1376     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1377     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1378     ierr = MatDestroy(&A);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1384 {
1385   PetscErrorCode ierr;
1386   PetscBool      iascii,isdraw,issocket,isbinary;
1387 
1388   PetscFunctionBegin;
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1393   if (iascii || isdraw || isbinary || issocket) {
1394     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1400 {
1401   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1402   PetscErrorCode ierr;
1403   Vec            bb1 = NULL;
1404   PetscBool      hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1409     PetscFunctionReturn(0);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1413     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1414   }
1415 
1416   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1417     if (flag & SOR_ZERO_INITIAL_GUESS) {
1418       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1419       its--;
1420     }
1421 
1422     while (its--) {
1423       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1425 
1426       /* update rhs: bb1 = bb - B*x */
1427       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1428       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1429 
1430       /* local sweep */
1431       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1432     }
1433   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1436       its--;
1437     }
1438     while (its--) {
1439       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441 
1442       /* update rhs: bb1 = bb - B*x */
1443       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1444       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1445 
1446       /* local sweep */
1447       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1448     }
1449   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454     while (its--) {
1455       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457 
1458       /* update rhs: bb1 = bb - B*x */
1459       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1460       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1461 
1462       /* local sweep */
1463       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1464     }
1465   } else if (flag & SOR_EISENSTAT) {
1466     Vec xx1;
1467 
1468     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1469     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1470 
1471     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473     if (!mat->diag) {
1474       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1475       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1476     }
1477     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1478     if (hasop) {
1479       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1480     } else {
1481       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1482     }
1483     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1484 
1485     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1486 
1487     /* local sweep */
1488     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1489     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1490     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1491   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1492 
1493   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1494 
1495   matin->factorerrortype = mat->A->factorerrortype;
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1500 {
1501   Mat            aA,aB,Aperm;
1502   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1503   PetscScalar    *aa,*ba;
1504   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1505   PetscSF        rowsf,sf;
1506   IS             parcolp = NULL;
1507   PetscBool      done;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1512   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1513   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1514   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1518   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1519   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1522   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1531   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1532 
1533   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1534   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1535   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1536 
1537   /* Find out where my gcols should go */
1538   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1539   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1540   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1541   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1542   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1543   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1549   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1571   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1574 
1575   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1577   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591   }
1592   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1595   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1597   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1598   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1599   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1600   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1601   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1613   if (ghosts) *ghosts = aij->garray;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1618 {
1619   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1620   Mat            A    = mat->A,B = mat->B;
1621   PetscErrorCode ierr;
1622   PetscLogDouble isend[5],irecv[5];
1623 
1624   PetscFunctionBegin;
1625   info->block_size = 1.0;
1626   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1627 
1628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1629   isend[3] = info->memory;  isend[4] = info->mallocs;
1630 
1631   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;  isend[4] += info->mallocs;
1635   if (flag == MAT_LOCAL) {
1636     info->nz_used      = isend[0];
1637     info->nz_allocated = isend[1];
1638     info->nz_unneeded  = isend[2];
1639     info->memory       = isend[3];
1640     info->mallocs      = isend[4];
1641   } else if (flag == MAT_GLOBAL_MAX) {
1642     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   } else if (flag == MAT_GLOBAL_SUM) {
1650     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   }
1658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1659   info->fill_ratio_needed = 0;
1660   info->factor_mallocs    = 0;
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1665 {
1666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A,1);
1680     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1681     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A,1);
1685     a->roworiented = flg;
1686 
1687     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1688     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1689     break;
1690   case MAT_FORCE_DIAGONAL_ENTRIES:
1691   case MAT_SORTED_FULL:
1692     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1693     break;
1694   case MAT_IGNORE_OFF_PROC_ENTRIES:
1695     a->donotstash = flg;
1696     break;
1697   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1698   case MAT_SPD:
1699   case MAT_SYMMETRIC:
1700   case MAT_STRUCTURALLY_SYMMETRIC:
1701   case MAT_HERMITIAN:
1702   case MAT_SYMMETRY_ETERNAL:
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   case MAT_STRUCTURE_ONLY:
1708     /* The option is handled directly by MatSetOption() */
1709     break;
1710   default:
1711     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1717 {
1718   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1719   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1720   PetscErrorCode ierr;
1721   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1722   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1723   PetscInt       *cmap,*idx_p;
1724 
1725   PetscFunctionBegin;
1726   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1727   mat->getrowactive = PETSC_TRUE;
1728 
1729   if (!mat->rowvalues && (idx || v)) {
1730     /*
1731         allocate enough space to hold information from the longest row.
1732     */
1733     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1734     PetscInt   max = 1,tmp;
1735     for (i=0; i<matin->rmap->n; i++) {
1736       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1737       if (max < tmp) max = tmp;
1738     }
1739     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1740   }
1741 
1742   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1743   lrow = row - rstart;
1744 
1745   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1746   if (!v)   {pvA = NULL; pvB = NULL;}
1747   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1748   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1749   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1750   nztot = nzA + nzB;
1751 
1752   cmap = mat->garray;
1753   if (v  || idx) {
1754     if (nztot) {
1755       /* Sort by increasing column numbers, assuming A and B already sorted */
1756       PetscInt imark = -1;
1757       if (v) {
1758         *v = v_p = mat->rowvalues;
1759         for (i=0; i<nzB; i++) {
1760           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1761           else break;
1762         }
1763         imark = i;
1764         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1765         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1766       }
1767       if (idx) {
1768         *idx = idx_p = mat->rowindices;
1769         if (imark > -1) {
1770           for (i=0; i<imark; i++) {
1771             idx_p[i] = cmap[cworkB[i]];
1772           }
1773         } else {
1774           for (i=0; i<nzB; i++) {
1775             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1776             else break;
1777           }
1778           imark = i;
1779         }
1780         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1781         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1782       }
1783     } else {
1784       if (idx) *idx = NULL;
1785       if (v)   *v   = NULL;
1786     }
1787   }
1788   *nz  = nztot;
1789   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1790   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1795 {
1796   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1797 
1798   PetscFunctionBegin;
1799   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1800   aij->getrowactive = PETSC_FALSE;
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1805 {
1806   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1807   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1808   PetscErrorCode ierr;
1809   PetscInt       i,j,cstart = mat->cmap->rstart;
1810   PetscReal      sum = 0.0;
1811   MatScalar      *v;
1812 
1813   PetscFunctionBegin;
1814   if (aij->size == 1) {
1815     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1816   } else {
1817     if (type == NORM_FROBENIUS) {
1818       v = amat->a;
1819       for (i=0; i<amat->nz; i++) {
1820         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1821       }
1822       v = bmat->a;
1823       for (i=0; i<bmat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1825       }
1826       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1827       *norm = PetscSqrtReal(*norm);
1828       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1829     } else if (type == NORM_1) { /* max column norm */
1830       PetscReal *tmp,*tmp2;
1831       PetscInt  *jj,*garray = aij->garray;
1832       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1833       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1834       *norm = 0.0;
1835       v     = amat->a; jj = amat->j;
1836       for (j=0; j<amat->nz; j++) {
1837         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1838       }
1839       v = bmat->a; jj = bmat->j;
1840       for (j=0; j<bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1842       }
1843       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1844       for (j=0; j<mat->cmap->N; j++) {
1845         if (tmp2[j] > *norm) *norm = tmp2[j];
1846       }
1847       ierr = PetscFree(tmp);CHKERRQ(ierr);
1848       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1849       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1850     } else if (type == NORM_INFINITY) { /* max row norm */
1851       PetscReal ntemp = 0.0;
1852       for (j=0; j<aij->A->rmap->n; j++) {
1853         v   = amat->a + amat->i[j];
1854         sum = 0.0;
1855         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1856           sum += PetscAbsScalar(*v); v++;
1857         }
1858         v = bmat->a + bmat->i[j];
1859         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1860           sum += PetscAbsScalar(*v); v++;
1861         }
1862         if (sum > ntemp) ntemp = sum;
1863       }
1864       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1865       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1866     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1872 {
1873   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1874   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1875   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1876   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1877   PetscErrorCode  ierr;
1878   Mat             B,A_diag,*B_diag;
1879   const MatScalar *pbv,*bv;
1880 
1881   PetscFunctionBegin;
1882   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1883   ai = Aloc->i; aj = Aloc->j;
1884   bi = Bloc->i; bj = Bloc->j;
1885   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1886     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1887     PetscSFNode          *oloc;
1888     PETSC_UNUSED PetscSF sf;
1889 
1890     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1891     /* compute d_nnz for preallocation */
1892     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1893     for (i=0; i<ai[ma]; i++) {
1894       d_nnz[aj[i]]++;
1895     }
1896     /* compute local off-diagonal contributions */
1897     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1898     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1899     /* map those to global */
1900     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1901     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1902     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1903     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1904     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1905     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1906     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1907 
1908     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1909     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1910     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1911     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1912     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1913     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1914   } else {
1915     B    = *matout;
1916     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1917   }
1918 
1919   b           = (Mat_MPIAIJ*)B->data;
1920   A_diag      = a->A;
1921   B_diag      = &b->A;
1922   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1923   A_diag_ncol = A_diag->cmap->N;
1924   B_diag_ilen = sub_B_diag->ilen;
1925   B_diag_i    = sub_B_diag->i;
1926 
1927   /* Set ilen for diagonal of B */
1928   for (i=0; i<A_diag_ncol; i++) {
1929     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1930   }
1931 
1932   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1933   very quickly (=without using MatSetValues), because all writes are local. */
1934   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1935 
1936   /* copy over the B part */
1937   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1938   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1939   pbv  = bv;
1940   row  = A->rmap->rstart;
1941   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1942   cols_tmp = cols;
1943   for (i=0; i<mb; i++) {
1944     ncol = bi[i+1]-bi[i];
1945     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1946     row++;
1947     pbv += ncol; cols_tmp += ncol;
1948   }
1949   ierr = PetscFree(cols);CHKERRQ(ierr);
1950   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1951 
1952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1955     *matout = B;
1956   } else {
1957     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1963 {
1964   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1965   Mat            a    = aij->A,b = aij->B;
1966   PetscErrorCode ierr;
1967   PetscInt       s1,s2,s3;
1968 
1969   PetscFunctionBegin;
1970   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1971   if (rr) {
1972     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1973     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1974     /* Overlap communication with computation. */
1975     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1976   }
1977   if (ll) {
1978     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1979     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1980     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1981   }
1982   /* scale  the diagonal block */
1983   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1984 
1985   if (rr) {
1986     /* Do a scatter end and then right scale the off-diagonal block */
1987     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1988     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1994 {
1995   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1996   PetscErrorCode ierr;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2004 {
2005   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2006   Mat            a,b,c,d;
2007   PetscBool      flg;
2008   PetscErrorCode ierr;
2009 
2010   PetscFunctionBegin;
2011   a = matA->A; b = matA->B;
2012   c = matB->A; d = matB->B;
2013 
2014   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2015   if (flg) {
2016     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2017   }
2018   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2023 {
2024   PetscErrorCode ierr;
2025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2026   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2027 
2028   PetscFunctionBegin;
2029   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2030   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2031     /* because of the column compression in the off-processor part of the matrix a->B,
2032        the number of columns in a->B and b->B may be different, hence we cannot call
2033        the MatCopy() directly on the two parts. If need be, we can provide a more
2034        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2035        then copying the submatrices */
2036     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2037   } else {
2038     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2039     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2040   }
2041   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2046 {
2047   PetscErrorCode ierr;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 /*
2055    Computes the number of nonzeros per row needed for preallocation when X and Y
2056    have different nonzero structure.
2057 */
2058 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2059 {
2060   PetscInt       i,j,k,nzx,nzy;
2061 
2062   PetscFunctionBegin;
2063   /* Set the number of nonzeros in the new matrix */
2064   for (i=0; i<m; i++) {
2065     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2066     nzx = xi[i+1] - xi[i];
2067     nzy = yi[i+1] - yi[i];
2068     nnz[i] = 0;
2069     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2070       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2071       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2072       nnz[i]++;
2073     }
2074     for (; k<nzy; k++) nnz[i]++;
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2080 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2081 {
2082   PetscErrorCode ierr;
2083   PetscInt       m = Y->rmap->N;
2084   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2085   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2086 
2087   PetscFunctionBegin;
2088   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2093 {
2094   PetscErrorCode ierr;
2095   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2100     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2103   } else {
2104     Mat      B;
2105     PetscInt *nnz_d,*nnz_o;
2106 
2107     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2108     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2109     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2110     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2111     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2112     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2113     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2114     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2115     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2116     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2117     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2118     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2119     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128 #if defined(PETSC_USE_COMPLEX)
2129   PetscErrorCode ierr;
2130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2134   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2135 #else
2136   PetscFunctionBegin;
2137 #endif
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2148   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2153 {
2154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2155   PetscErrorCode ierr;
2156 
2157   PetscFunctionBegin;
2158   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2159   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2164 {
2165   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2166   PetscErrorCode    ierr;
2167   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2168   PetscScalar       *va,*vv;
2169   Vec               vB,vA;
2170   const PetscScalar *vb;
2171 
2172   PetscFunctionBegin;
2173   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2174   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2175 
2176   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2177   if (idx) {
2178     for (i=0; i<m; i++) {
2179       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2180     }
2181   }
2182 
2183   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2184   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2185   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2186 
2187   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2188   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2189   for (i=0; i<m; i++) {
2190     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2191       vv[i] = vb[i];
2192       if (idx) idx[i] = a->garray[idxb[i]];
2193     } else {
2194       vv[i] = va[i];
2195       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2196         idx[i] = a->garray[idxb[i]];
2197     }
2198   }
2199   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2200   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2201   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2202   ierr = PetscFree(idxb);CHKERRQ(ierr);
2203   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2204   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2209 {
2210   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2211   PetscInt          m = A->rmap->n,n = A->cmap->n;
2212   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2213   PetscInt          *cmap  = mat->garray;
2214   PetscInt          *diagIdx, *offdiagIdx;
2215   Vec               diagV, offdiagV;
2216   PetscScalar       *a, *diagA, *offdiagA;
2217   const PetscScalar *ba,*bav;
2218   PetscInt          r,j,col,ncols,*bi,*bj;
2219   PetscErrorCode    ierr;
2220   Mat               B = mat->B;
2221   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2222 
2223   PetscFunctionBegin;
2224   /* When a process holds entire A and other processes have no entry */
2225   if (A->cmap->N == n) {
2226     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2227     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2228     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2229     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2230     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2231     PetscFunctionReturn(0);
2232   } else if (n == 0) {
2233     if (m) {
2234       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2235       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2236       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2237     }
2238     PetscFunctionReturn(0);
2239   }
2240 
2241   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2242   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2243   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2244   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2245 
2246   /* Get offdiagIdx[] for implicit 0.0 */
2247   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2248   ba   = bav;
2249   bi   = b->i;
2250   bj   = b->j;
2251   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2252   for (r = 0; r < m; r++) {
2253     ncols = bi[r+1] - bi[r];
2254     if (ncols == A->cmap->N - n) { /* Brow is dense */
2255       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2256     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2257       offdiagA[r] = 0.0;
2258 
2259       /* Find first hole in the cmap */
2260       for (j=0; j<ncols; j++) {
2261         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2262         if (col > j && j < cstart) {
2263           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2264           break;
2265         } else if (col > j + n && j >= cstart) {
2266           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2267           break;
2268         }
2269       }
2270       if (j == ncols && ncols < A->cmap->N - n) {
2271         /* a hole is outside compressed Bcols */
2272         if (ncols == 0) {
2273           if (cstart) {
2274             offdiagIdx[r] = 0;
2275           } else offdiagIdx[r] = cend;
2276         } else { /* ncols > 0 */
2277           offdiagIdx[r] = cmap[ncols-1] + 1;
2278           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2279         }
2280       }
2281     }
2282 
2283     for (j=0; j<ncols; j++) {
2284       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2285       ba++; bj++;
2286     }
2287   }
2288 
2289   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2290   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2291   for (r = 0; r < m; ++r) {
2292     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2293       a[r]   = diagA[r];
2294       if (idx) idx[r] = cstart + diagIdx[r];
2295     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2296       a[r] = diagA[r];
2297       if (idx) {
2298         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2299           idx[r] = cstart + diagIdx[r];
2300         } else idx[r] = offdiagIdx[r];
2301       }
2302     } else {
2303       a[r]   = offdiagA[r];
2304       if (idx) idx[r] = offdiagIdx[r];
2305     }
2306   }
2307   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2309   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2310   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2311   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2312   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2313   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2318 {
2319   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2320   PetscInt          m = A->rmap->n,n = A->cmap->n;
2321   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2322   PetscInt          *cmap  = mat->garray;
2323   PetscInt          *diagIdx, *offdiagIdx;
2324   Vec               diagV, offdiagV;
2325   PetscScalar       *a, *diagA, *offdiagA;
2326   const PetscScalar *ba,*bav;
2327   PetscInt          r,j,col,ncols,*bi,*bj;
2328   PetscErrorCode    ierr;
2329   Mat               B = mat->B;
2330   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2331 
2332   PetscFunctionBegin;
2333   /* When a process holds entire A and other processes have no entry */
2334   if (A->cmap->N == n) {
2335     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2336     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2337     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2338     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2339     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2340     PetscFunctionReturn(0);
2341   } else if (n == 0) {
2342     if (m) {
2343       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2344       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2345       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2346     }
2347     PetscFunctionReturn(0);
2348   }
2349 
2350   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2352   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2353   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2354 
2355   /* Get offdiagIdx[] for implicit 0.0 */
2356   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2357   ba   = bav;
2358   bi   = b->i;
2359   bj   = b->j;
2360   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2361   for (r = 0; r < m; r++) {
2362     ncols = bi[r+1] - bi[r];
2363     if (ncols == A->cmap->N - n) { /* Brow is dense */
2364       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2365     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2366       offdiagA[r] = 0.0;
2367 
2368       /* Find first hole in the cmap */
2369       for (j=0; j<ncols; j++) {
2370         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2371         if (col > j && j < cstart) {
2372           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2373           break;
2374         } else if (col > j + n && j >= cstart) {
2375           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2376           break;
2377         }
2378       }
2379       if (j == ncols && ncols < A->cmap->N - n) {
2380         /* a hole is outside compressed Bcols */
2381         if (ncols == 0) {
2382           if (cstart) {
2383             offdiagIdx[r] = 0;
2384           } else offdiagIdx[r] = cend;
2385         } else { /* ncols > 0 */
2386           offdiagIdx[r] = cmap[ncols-1] + 1;
2387           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2388         }
2389       }
2390     }
2391 
2392     for (j=0; j<ncols; j++) {
2393       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2394       ba++; bj++;
2395     }
2396   }
2397 
2398   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2399   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2400   for (r = 0; r < m; ++r) {
2401     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2402       a[r]   = diagA[r];
2403       if (idx) idx[r] = cstart + diagIdx[r];
2404     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2405       a[r] = diagA[r];
2406       if (idx) {
2407         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2408           idx[r] = cstart + diagIdx[r];
2409         } else idx[r] = offdiagIdx[r];
2410       }
2411     } else {
2412       a[r]   = offdiagA[r];
2413       if (idx) idx[r] = offdiagIdx[r];
2414     }
2415   }
2416   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2418   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2419   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2421   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2422   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2427 {
2428   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2429   PetscInt          m = A->rmap->n,n = A->cmap->n;
2430   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2431   PetscInt          *cmap  = mat->garray;
2432   PetscInt          *diagIdx, *offdiagIdx;
2433   Vec               diagV, offdiagV;
2434   PetscScalar       *a, *diagA, *offdiagA;
2435   const PetscScalar *ba,*bav;
2436   PetscInt          r,j,col,ncols,*bi,*bj;
2437   PetscErrorCode    ierr;
2438   Mat               B = mat->B;
2439   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2440 
2441   PetscFunctionBegin;
2442   /* When a process holds entire A and other processes have no entry */
2443   if (A->cmap->N == n) {
2444     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2445     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2446     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2447     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2449     PetscFunctionReturn(0);
2450   } else if (n == 0) {
2451     if (m) {
2452       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2453       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2454       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2455     }
2456     PetscFunctionReturn(0);
2457   }
2458 
2459   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2460   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2461   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2462   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2463 
2464   /* Get offdiagIdx[] for implicit 0.0 */
2465   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2466   ba   = bav;
2467   bi   = b->i;
2468   bj   = b->j;
2469   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2470   for (r = 0; r < m; r++) {
2471     ncols = bi[r+1] - bi[r];
2472     if (ncols == A->cmap->N - n) { /* Brow is dense */
2473       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2474     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2475       offdiagA[r] = 0.0;
2476 
2477       /* Find first hole in the cmap */
2478       for (j=0; j<ncols; j++) {
2479         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2480         if (col > j && j < cstart) {
2481           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2482           break;
2483         } else if (col > j + n && j >= cstart) {
2484           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2485           break;
2486         }
2487       }
2488       if (j == ncols && ncols < A->cmap->N - n) {
2489         /* a hole is outside compressed Bcols */
2490         if (ncols == 0) {
2491           if (cstart) {
2492             offdiagIdx[r] = 0;
2493           } else offdiagIdx[r] = cend;
2494         } else { /* ncols > 0 */
2495           offdiagIdx[r] = cmap[ncols-1] + 1;
2496           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2497         }
2498       }
2499     }
2500 
2501     for (j=0; j<ncols; j++) {
2502       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2503       ba++; bj++;
2504     }
2505   }
2506 
2507   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2508   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2509   for (r = 0; r < m; ++r) {
2510     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2511       a[r] = diagA[r];
2512       if (idx) idx[r] = cstart + diagIdx[r];
2513     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2514       a[r] = diagA[r];
2515       if (idx) {
2516         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2517           idx[r] = cstart + diagIdx[r];
2518         } else idx[r] = offdiagIdx[r];
2519       }
2520     } else {
2521       a[r] = offdiagA[r];
2522       if (idx) idx[r] = offdiagIdx[r];
2523     }
2524   }
2525   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2527   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2528   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2529   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2530   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2531   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2536 {
2537   PetscErrorCode ierr;
2538   Mat            *dummy;
2539 
2540   PetscFunctionBegin;
2541   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2542   *newmat = *dummy;
2543   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2548 {
2549   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2554   A->factorerrortype = a->A->factorerrortype;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2559 {
2560   PetscErrorCode ierr;
2561   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2562 
2563   PetscFunctionBegin;
2564   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2565   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2566   if (x->assembled) {
2567     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2568   } else {
2569     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2570   }
2571   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2572   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2577 {
2578   PetscFunctionBegin;
2579   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2580   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2586 
2587    Collective on Mat
2588 
2589    Input Parameters:
2590 +    A - the matrix
2591 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2592 
2593  Level: advanced
2594 
2595 @*/
2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2597 {
2598   PetscErrorCode       ierr;
2599 
2600   PetscFunctionBegin;
2601   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2606 {
2607   PetscErrorCode       ierr;
2608   PetscBool            sc = PETSC_FALSE,flg;
2609 
2610   PetscFunctionBegin;
2611   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2612   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2613   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2614   if (flg) {
2615     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2616   }
2617   ierr = PetscOptionsTail();CHKERRQ(ierr);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2622 {
2623   PetscErrorCode ierr;
2624   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2625   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2626 
2627   PetscFunctionBegin;
2628   if (!Y->preallocated) {
2629     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2630   } else if (!aij->nz) {
2631     PetscInt nonew = aij->nonew;
2632     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2633     aij->nonew = nonew;
2634   }
2635   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2640 {
2641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2642   PetscErrorCode ierr;
2643 
2644   PetscFunctionBegin;
2645   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2646   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2647   if (d) {
2648     PetscInt rstart;
2649     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2650     *d += rstart;
2651 
2652   }
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2657 {
2658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2659   PetscErrorCode ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 /* -------------------------------------------------------------------*/
2667 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2668                                        MatGetRow_MPIAIJ,
2669                                        MatRestoreRow_MPIAIJ,
2670                                        MatMult_MPIAIJ,
2671                                 /* 4*/ MatMultAdd_MPIAIJ,
2672                                        MatMultTranspose_MPIAIJ,
2673                                        MatMultTransposeAdd_MPIAIJ,
2674                                        NULL,
2675                                        NULL,
2676                                        NULL,
2677                                 /*10*/ NULL,
2678                                        NULL,
2679                                        NULL,
2680                                        MatSOR_MPIAIJ,
2681                                        MatTranspose_MPIAIJ,
2682                                 /*15*/ MatGetInfo_MPIAIJ,
2683                                        MatEqual_MPIAIJ,
2684                                        MatGetDiagonal_MPIAIJ,
2685                                        MatDiagonalScale_MPIAIJ,
2686                                        MatNorm_MPIAIJ,
2687                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2688                                        MatAssemblyEnd_MPIAIJ,
2689                                        MatSetOption_MPIAIJ,
2690                                        MatZeroEntries_MPIAIJ,
2691                                 /*24*/ MatZeroRows_MPIAIJ,
2692                                        NULL,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                 /*29*/ MatSetUp_MPIAIJ,
2697                                        NULL,
2698                                        NULL,
2699                                        MatGetDiagonalBlock_MPIAIJ,
2700                                        NULL,
2701                                 /*34*/ MatDuplicate_MPIAIJ,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                 /*39*/ MatAXPY_MPIAIJ,
2707                                        MatCreateSubMatrices_MPIAIJ,
2708                                        MatIncreaseOverlap_MPIAIJ,
2709                                        MatGetValues_MPIAIJ,
2710                                        MatCopy_MPIAIJ,
2711                                 /*44*/ MatGetRowMax_MPIAIJ,
2712                                        MatScale_MPIAIJ,
2713                                        MatShift_MPIAIJ,
2714                                        MatDiagonalSet_MPIAIJ,
2715                                        MatZeroRowsColumns_MPIAIJ,
2716                                 /*49*/ MatSetRandom_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2722                                        NULL,
2723                                        MatSetUnfactored_MPIAIJ,
2724                                        MatPermute_MPIAIJ,
2725                                        NULL,
2726                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2727                                        MatDestroy_MPIAIJ,
2728                                        MatView_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                 /*64*/ NULL,
2732                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2733                                        NULL,
2734                                        NULL,
2735                                        NULL,
2736                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2737                                        MatGetRowMinAbs_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                 /*75*/ MatFDColoringApply_AIJ,
2743                                        MatSetFromOptions_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        MatFindZeroDiagonals_MPIAIJ,
2747                                 /*80*/ NULL,
2748                                        NULL,
2749                                        NULL,
2750                                 /*83*/ MatLoad_MPIAIJ,
2751                                        MatIsSymmetric_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*89*/ NULL,
2757                                        NULL,
2758                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        MatBindToCPU_MPIAIJ,
2766                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2767                                        NULL,
2768                                        NULL,
2769                                        MatConjugate_MPIAIJ,
2770                                        NULL,
2771                                 /*104*/MatSetValuesRow_MPIAIJ,
2772                                        MatRealPart_MPIAIJ,
2773                                        MatImaginaryPart_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                 /*109*/NULL,
2777                                        NULL,
2778                                        MatGetRowMin_MPIAIJ,
2779                                        NULL,
2780                                        MatMissingDiagonal_MPIAIJ,
2781                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2782                                        NULL,
2783                                        MatGetGhosts_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        MatGetMultiProcBlock_MPIAIJ,
2791                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2792                                        MatGetColumnNorms_MPIAIJ,
2793                                        MatInvertBlockDiagonal_MPIAIJ,
2794                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2795                                        MatCreateSubMatricesMPI_MPIAIJ,
2796                                 /*129*/NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2800                                        NULL,
2801                                 /*134*/NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                 /*139*/MatSetBlockSizes_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        MatFDColoringSetUp_MPIXAIJ,
2810                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2811                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2812                                 /*145*/NULL,
2813                                        NULL,
2814                                        NULL
2815 };
2816 
2817 /* ----------------------------------------------------------------------------------------*/
2818 
2819 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2820 {
2821   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2822   PetscErrorCode ierr;
2823 
2824   PetscFunctionBegin;
2825   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2826   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2831 {
2832   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2833   PetscErrorCode ierr;
2834 
2835   PetscFunctionBegin;
2836   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2837   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2842 {
2843   Mat_MPIAIJ     *b;
2844   PetscErrorCode ierr;
2845   PetscMPIInt    size;
2846 
2847   PetscFunctionBegin;
2848   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2849   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2850   b = (Mat_MPIAIJ*)B->data;
2851 
2852 #if defined(PETSC_USE_CTABLE)
2853   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2854 #else
2855   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2856 #endif
2857   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2858   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2859   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2860 
2861   /* Because the B will have been resized we simply destroy it and create a new one each time */
2862   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2863   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2864   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2865   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2866   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2867   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2868   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2869 
2870   if (!B->preallocated) {
2871     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2872     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2873     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2874     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2875     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2876   }
2877 
2878   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2879   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2880   B->preallocated  = PETSC_TRUE;
2881   B->was_assembled = PETSC_FALSE;
2882   B->assembled     = PETSC_FALSE;
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2887 {
2888   Mat_MPIAIJ     *b;
2889   PetscErrorCode ierr;
2890 
2891   PetscFunctionBegin;
2892   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2893   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2895   b = (Mat_MPIAIJ*)B->data;
2896 
2897 #if defined(PETSC_USE_CTABLE)
2898   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2899 #else
2900   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2901 #endif
2902   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2903   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2904   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2905 
2906   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2907   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2908   B->preallocated  = PETSC_TRUE;
2909   B->was_assembled = PETSC_FALSE;
2910   B->assembled = PETSC_FALSE;
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2915 {
2916   Mat            mat;
2917   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2918   PetscErrorCode ierr;
2919 
2920   PetscFunctionBegin;
2921   *newmat = NULL;
2922   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2923   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2924   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2925   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2926   a       = (Mat_MPIAIJ*)mat->data;
2927 
2928   mat->factortype   = matin->factortype;
2929   mat->assembled    = matin->assembled;
2930   mat->insertmode   = NOT_SET_VALUES;
2931   mat->preallocated = matin->preallocated;
2932 
2933   a->size         = oldmat->size;
2934   a->rank         = oldmat->rank;
2935   a->donotstash   = oldmat->donotstash;
2936   a->roworiented  = oldmat->roworiented;
2937   a->rowindices   = NULL;
2938   a->rowvalues    = NULL;
2939   a->getrowactive = PETSC_FALSE;
2940 
2941   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2942   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2943 
2944   if (oldmat->colmap) {
2945 #if defined(PETSC_USE_CTABLE)
2946     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2947 #else
2948     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2949     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2950     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2951 #endif
2952   } else a->colmap = NULL;
2953   if (oldmat->garray) {
2954     PetscInt len;
2955     len  = oldmat->B->cmap->n;
2956     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2957     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2958     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2959   } else a->garray = NULL;
2960 
2961   /* It may happen MatDuplicate is called with a non-assembled matrix
2962      In fact, MatDuplicate only requires the matrix to be preallocated
2963      This may happen inside a DMCreateMatrix_Shell */
2964   if (oldmat->lvec) {
2965     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2966     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2967   }
2968   if (oldmat->Mvctx) {
2969     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2970     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2971   }
2972   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2973   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2974   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2975   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2976   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2977   *newmat = mat;
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2982 {
2983   PetscBool      isbinary, ishdf5;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2988   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2989   /* force binary viewer to load .info file if it has not yet done so */
2990   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2993   if (isbinary) {
2994     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2995   } else if (ishdf5) {
2996 #if defined(PETSC_HAVE_HDF5)
2997     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2998 #else
2999     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3000 #endif
3001   } else {
3002     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3003   }
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3008 {
3009   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3010   PetscInt       *rowidxs,*colidxs;
3011   PetscScalar    *matvals;
3012   PetscErrorCode ierr;
3013 
3014   PetscFunctionBegin;
3015   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3016 
3017   /* read in matrix header */
3018   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3019   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3020   M  = header[1]; N = header[2]; nz = header[3];
3021   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3022   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3023   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3024 
3025   /* set block sizes from the viewer's .info file */
3026   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3027   /* set global sizes if not set already */
3028   if (mat->rmap->N < 0) mat->rmap->N = M;
3029   if (mat->cmap->N < 0) mat->cmap->N = N;
3030   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3031   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3032 
3033   /* check if the matrix sizes are correct */
3034   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3035   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3036 
3037   /* read in row lengths and build row indices */
3038   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3039   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3040   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3041   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3042   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3043   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3044   /* read in column indices and matrix values */
3045   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3046   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3047   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3048   /* store matrix indices and values */
3049   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3050   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3051   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /* Not scalable because of ISAllGather() unless getting all columns. */
3056 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3057 {
3058   PetscErrorCode ierr;
3059   IS             iscol_local;
3060   PetscBool      isstride;
3061   PetscMPIInt    lisstride=0,gisstride;
3062 
3063   PetscFunctionBegin;
3064   /* check if we are grabbing all columns*/
3065   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3066 
3067   if (isstride) {
3068     PetscInt  start,len,mstart,mlen;
3069     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3070     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3071     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3072     if (mstart == start && mlen-mstart == len) lisstride = 1;
3073   }
3074 
3075   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3076   if (gisstride) {
3077     PetscInt N;
3078     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3079     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3080     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3081     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3082   } else {
3083     PetscInt cbs;
3084     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3085     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3087   }
3088 
3089   *isseq = iscol_local;
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 /*
3094  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3095  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3096 
3097  Input Parameters:
3098    mat - matrix
3099    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3100            i.e., mat->rstart <= isrow[i] < mat->rend
3101    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3102            i.e., mat->cstart <= iscol[i] < mat->cend
3103  Output Parameter:
3104    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3105    iscol_o - sequential column index set for retrieving mat->B
3106    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3107  */
3108 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3109 {
3110   PetscErrorCode ierr;
3111   Vec            x,cmap;
3112   const PetscInt *is_idx;
3113   PetscScalar    *xarray,*cmaparray;
3114   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3115   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3116   Mat            B=a->B;
3117   Vec            lvec=a->lvec,lcmap;
3118   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3119   MPI_Comm       comm;
3120   VecScatter     Mvctx=a->Mvctx;
3121 
3122   PetscFunctionBegin;
3123   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3124   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3125 
3126   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3127   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3128   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3129   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3130   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3131 
3132   /* Get start indices */
3133   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3134   isstart -= ncols;
3135   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3136 
3137   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3138   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3139   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3140   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3141   for (i=0; i<ncols; i++) {
3142     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3143     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3144     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3145   }
3146   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3147   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3148   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3149 
3150   /* Get iscol_d */
3151   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3152   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3153   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3154 
3155   /* Get isrow_d */
3156   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3157   rstart = mat->rmap->rstart;
3158   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3159   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3160   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3161   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3162 
3163   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3164   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3165   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3166 
3167   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3168   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3169   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3170 
3171   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3172 
3173   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3175 
3176   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3177   /* off-process column indices */
3178   count = 0;
3179   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3181 
3182   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3183   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3184   for (i=0; i<Bn; i++) {
3185     if (PetscRealPart(xarray[i]) > -1.0) {
3186       idx[count]     = i;                   /* local column index in off-diagonal part B */
3187       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3188       count++;
3189     }
3190   }
3191   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3192   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3193 
3194   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3195   /* cannot ensure iscol_o has same blocksize as iscol! */
3196 
3197   ierr = PetscFree(idx);CHKERRQ(ierr);
3198   *garray = cmap1;
3199 
3200   ierr = VecDestroy(&x);CHKERRQ(ierr);
3201   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3202   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3207 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3208 {
3209   PetscErrorCode ierr;
3210   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3211   Mat            M = NULL;
3212   MPI_Comm       comm;
3213   IS             iscol_d,isrow_d,iscol_o;
3214   Mat            Asub = NULL,Bsub = NULL;
3215   PetscInt       n;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3219 
3220   if (call == MAT_REUSE_MATRIX) {
3221     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3222     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3223     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3224 
3225     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3226     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3227 
3228     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3229     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3230 
3231     /* Update diagonal and off-diagonal portions of submat */
3232     asub = (Mat_MPIAIJ*)(*submat)->data;
3233     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3234     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3235     if (n) {
3236       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3237     }
3238     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3240 
3241   } else { /* call == MAT_INITIAL_MATRIX) */
3242     const PetscInt *garray;
3243     PetscInt        BsubN;
3244 
3245     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3246     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3247 
3248     /* Create local submatrices Asub and Bsub */
3249     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3250     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3251 
3252     /* Create submatrix M */
3253     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3254 
3255     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3256     asub = (Mat_MPIAIJ*)M->data;
3257 
3258     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3259     n = asub->B->cmap->N;
3260     if (BsubN > n) {
3261       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3262       const PetscInt *idx;
3263       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3264       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3265 
3266       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3267       j = 0;
3268       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3269       for (i=0; i<n; i++) {
3270         if (j >= BsubN) break;
3271         while (subgarray[i] > garray[j]) j++;
3272 
3273         if (subgarray[i] == garray[j]) {
3274           idx_new[i] = idx[j++];
3275         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3276       }
3277       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3278 
3279       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3280       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3281 
3282     } else if (BsubN < n) {
3283       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3284     }
3285 
3286     ierr = PetscFree(garray);CHKERRQ(ierr);
3287     *submat = M;
3288 
3289     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3290     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3291     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3292 
3293     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3294     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3295 
3296     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3297     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3298   }
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3303 {
3304   PetscErrorCode ierr;
3305   IS             iscol_local=NULL,isrow_d;
3306   PetscInt       csize;
3307   PetscInt       n,i,j,start,end;
3308   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3309   MPI_Comm       comm;
3310 
3311   PetscFunctionBegin;
3312   /* If isrow has same processor distribution as mat,
3313      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3314   if (call == MAT_REUSE_MATRIX) {
3315     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3316     if (isrow_d) {
3317       sameRowDist  = PETSC_TRUE;
3318       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3319     } else {
3320       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3321       if (iscol_local) {
3322         sameRowDist  = PETSC_TRUE;
3323         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3324       }
3325     }
3326   } else {
3327     /* Check if isrow has same processor distribution as mat */
3328     sameDist[0] = PETSC_FALSE;
3329     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3330     if (!n) {
3331       sameDist[0] = PETSC_TRUE;
3332     } else {
3333       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3334       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3335       if (i >= start && j < end) {
3336         sameDist[0] = PETSC_TRUE;
3337       }
3338     }
3339 
3340     /* Check if iscol has same processor distribution as mat */
3341     sameDist[1] = PETSC_FALSE;
3342     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3343     if (!n) {
3344       sameDist[1] = PETSC_TRUE;
3345     } else {
3346       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3347       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3348       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3349     }
3350 
3351     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3352     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3353     sameRowDist = tsameDist[0];
3354   }
3355 
3356   if (sameRowDist) {
3357     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3358       /* isrow and iscol have same processor distribution as mat */
3359       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3360       PetscFunctionReturn(0);
3361     } else { /* sameRowDist */
3362       /* isrow has same processor distribution as mat */
3363       if (call == MAT_INITIAL_MATRIX) {
3364         PetscBool sorted;
3365         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3366         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3367         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3368         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3369 
3370         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3371         if (sorted) {
3372           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3373           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3374           PetscFunctionReturn(0);
3375         }
3376       } else { /* call == MAT_REUSE_MATRIX */
3377         IS iscol_sub;
3378         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3379         if (iscol_sub) {
3380           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3381           PetscFunctionReturn(0);
3382         }
3383       }
3384     }
3385   }
3386 
3387   /* General case: iscol -> iscol_local which has global size of iscol */
3388   if (call == MAT_REUSE_MATRIX) {
3389     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3390     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3391   } else {
3392     if (!iscol_local) {
3393       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3394     }
3395   }
3396 
3397   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3398   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3399 
3400   if (call == MAT_INITIAL_MATRIX) {
3401     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3402     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3403   }
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 /*@C
3408      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3409          and "off-diagonal" part of the matrix in CSR format.
3410 
3411    Collective
3412 
3413    Input Parameters:
3414 +  comm - MPI communicator
3415 .  A - "diagonal" portion of matrix
3416 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3417 -  garray - global index of B columns
3418 
3419    Output Parameter:
3420 .   mat - the matrix, with input A as its local diagonal matrix
3421    Level: advanced
3422 
3423    Notes:
3424        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3425        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3426 
3427 .seealso: MatCreateMPIAIJWithSplitArrays()
3428 @*/
3429 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3430 {
3431   PetscErrorCode    ierr;
3432   Mat_MPIAIJ        *maij;
3433   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3434   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3435   const PetscScalar *oa;
3436   Mat               Bnew;
3437   PetscInt          m,n,N;
3438 
3439   PetscFunctionBegin;
3440   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3441   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3442   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3443   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3444   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3445   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3446 
3447   /* Get global columns of mat */
3448   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3449 
3450   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3451   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3452   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3453   maij = (Mat_MPIAIJ*)(*mat)->data;
3454 
3455   (*mat)->preallocated = PETSC_TRUE;
3456 
3457   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3458   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3459 
3460   /* Set A as diagonal portion of *mat */
3461   maij->A = A;
3462 
3463   nz = oi[m];
3464   for (i=0; i<nz; i++) {
3465     col   = oj[i];
3466     oj[i] = garray[col];
3467   }
3468 
3469   /* Set Bnew as off-diagonal portion of *mat */
3470   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3471   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3472   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3473   bnew        = (Mat_SeqAIJ*)Bnew->data;
3474   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3475   maij->B     = Bnew;
3476 
3477   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3478 
3479   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3480   b->free_a       = PETSC_FALSE;
3481   b->free_ij      = PETSC_FALSE;
3482   ierr = MatDestroy(&B);CHKERRQ(ierr);
3483 
3484   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3485   bnew->free_a       = PETSC_TRUE;
3486   bnew->free_ij      = PETSC_TRUE;
3487 
3488   /* condense columns of maij->B */
3489   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3490   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3491   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3492   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3493   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3498 
3499 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3500 {
3501   PetscErrorCode ierr;
3502   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3503   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3504   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3505   Mat            M,Msub,B=a->B;
3506   MatScalar      *aa;
3507   Mat_SeqAIJ     *aij;
3508   PetscInt       *garray = a->garray,*colsub,Ncols;
3509   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3510   IS             iscol_sub,iscmap;
3511   const PetscInt *is_idx,*cmap;
3512   PetscBool      allcolumns=PETSC_FALSE;
3513   MPI_Comm       comm;
3514 
3515   PetscFunctionBegin;
3516   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3517   if (call == MAT_REUSE_MATRIX) {
3518     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3519     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3520     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3521 
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3523     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3524 
3525     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3526     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3527 
3528     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3529 
3530   } else { /* call == MAT_INITIAL_MATRIX) */
3531     PetscBool flg;
3532 
3533     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3534     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3535 
3536     /* (1) iscol -> nonscalable iscol_local */
3537     /* Check for special case: each processor gets entire matrix columns */
3538     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3539     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3540     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3541     if (allcolumns) {
3542       iscol_sub = iscol_local;
3543       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3544       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3545 
3546     } else {
3547       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3548       PetscInt *idx,*cmap1,k;
3549       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3550       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3551       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3552       count = 0;
3553       k     = 0;
3554       for (i=0; i<Ncols; i++) {
3555         j = is_idx[i];
3556         if (j >= cstart && j < cend) {
3557           /* diagonal part of mat */
3558           idx[count]     = j;
3559           cmap1[count++] = i; /* column index in submat */
3560         } else if (Bn) {
3561           /* off-diagonal part of mat */
3562           if (j == garray[k]) {
3563             idx[count]     = j;
3564             cmap1[count++] = i;  /* column index in submat */
3565           } else if (j > garray[k]) {
3566             while (j > garray[k] && k < Bn-1) k++;
3567             if (j == garray[k]) {
3568               idx[count]     = j;
3569               cmap1[count++] = i; /* column index in submat */
3570             }
3571           }
3572         }
3573       }
3574       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3575 
3576       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3577       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3578       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3579 
3580       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3581     }
3582 
3583     /* (3) Create sequential Msub */
3584     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3585   }
3586 
3587   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3588   aij  = (Mat_SeqAIJ*)(Msub)->data;
3589   ii   = aij->i;
3590   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3591 
3592   /*
3593       m - number of local rows
3594       Ncols - number of columns (same on all processors)
3595       rstart - first row in new global matrix generated
3596   */
3597   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3598 
3599   if (call == MAT_INITIAL_MATRIX) {
3600     /* (4) Create parallel newmat */
3601     PetscMPIInt    rank,size;
3602     PetscInt       csize;
3603 
3604     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3605     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3606 
3607     /*
3608         Determine the number of non-zeros in the diagonal and off-diagonal
3609         portions of the matrix in order to do correct preallocation
3610     */
3611 
3612     /* first get start and end of "diagonal" columns */
3613     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3614     if (csize == PETSC_DECIDE) {
3615       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3616       if (mglobal == Ncols) { /* square matrix */
3617         nlocal = m;
3618       } else {
3619         nlocal = Ncols/size + ((Ncols % size) > rank);
3620       }
3621     } else {
3622       nlocal = csize;
3623     }
3624     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3625     rstart = rend - nlocal;
3626     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3627 
3628     /* next, compute all the lengths */
3629     jj    = aij->j;
3630     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3631     olens = dlens + m;
3632     for (i=0; i<m; i++) {
3633       jend = ii[i+1] - ii[i];
3634       olen = 0;
3635       dlen = 0;
3636       for (j=0; j<jend; j++) {
3637         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3638         else dlen++;
3639         jj++;
3640       }
3641       olens[i] = olen;
3642       dlens[i] = dlen;
3643     }
3644 
3645     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3646     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3647 
3648     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3649     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3650     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3651     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3652     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3653     ierr = PetscFree(dlens);CHKERRQ(ierr);
3654 
3655   } else { /* call == MAT_REUSE_MATRIX */
3656     M    = *newmat;
3657     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3658     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3659     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3660     /*
3661          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3662        rather than the slower MatSetValues().
3663     */
3664     M->was_assembled = PETSC_TRUE;
3665     M->assembled     = PETSC_FALSE;
3666   }
3667 
3668   /* (5) Set values of Msub to *newmat */
3669   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3670   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3671 
3672   jj   = aij->j;
3673   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3674   for (i=0; i<m; i++) {
3675     row = rstart + i;
3676     nz  = ii[i+1] - ii[i];
3677     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3678     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3679     jj += nz; aa += nz;
3680   }
3681   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3682   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3683 
3684   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3686 
3687   ierr = PetscFree(colsub);CHKERRQ(ierr);
3688 
3689   /* save Msub, iscol_sub and iscmap used in processor for next request */
3690   if (call == MAT_INITIAL_MATRIX) {
3691     *newmat = M;
3692     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3693     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3694 
3695     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3696     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3697 
3698     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3699     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3700 
3701     if (iscol_local) {
3702       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3703       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3704     }
3705   }
3706   PetscFunctionReturn(0);
3707 }
3708 
3709 /*
3710     Not great since it makes two copies of the submatrix, first an SeqAIJ
3711   in local and then by concatenating the local matrices the end result.
3712   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3713 
3714   Note: This requires a sequential iscol with all indices.
3715 */
3716 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3717 {
3718   PetscErrorCode ierr;
3719   PetscMPIInt    rank,size;
3720   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3721   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3722   Mat            M,Mreuse;
3723   MatScalar      *aa,*vwork;
3724   MPI_Comm       comm;
3725   Mat_SeqAIJ     *aij;
3726   PetscBool      colflag,allcolumns=PETSC_FALSE;
3727 
3728   PetscFunctionBegin;
3729   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3730   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3731   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3732 
3733   /* Check for special case: each processor gets entire matrix columns */
3734   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3735   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3736   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3737   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3738 
3739   if (call ==  MAT_REUSE_MATRIX) {
3740     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3741     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3742     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3743   } else {
3744     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3745   }
3746 
3747   /*
3748       m - number of local rows
3749       n - number of columns (same on all processors)
3750       rstart - first row in new global matrix generated
3751   */
3752   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3753   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3754   if (call == MAT_INITIAL_MATRIX) {
3755     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3756     ii  = aij->i;
3757     jj  = aij->j;
3758 
3759     /*
3760         Determine the number of non-zeros in the diagonal and off-diagonal
3761         portions of the matrix in order to do correct preallocation
3762     */
3763 
3764     /* first get start and end of "diagonal" columns */
3765     if (csize == PETSC_DECIDE) {
3766       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3767       if (mglobal == n) { /* square matrix */
3768         nlocal = m;
3769       } else {
3770         nlocal = n/size + ((n % size) > rank);
3771       }
3772     } else {
3773       nlocal = csize;
3774     }
3775     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3776     rstart = rend - nlocal;
3777     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3778 
3779     /* next, compute all the lengths */
3780     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3781     olens = dlens + m;
3782     for (i=0; i<m; i++) {
3783       jend = ii[i+1] - ii[i];
3784       olen = 0;
3785       dlen = 0;
3786       for (j=0; j<jend; j++) {
3787         if (*jj < rstart || *jj >= rend) olen++;
3788         else dlen++;
3789         jj++;
3790       }
3791       olens[i] = olen;
3792       dlens[i] = dlen;
3793     }
3794     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3795     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3796     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3797     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3798     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3799     ierr = PetscFree(dlens);CHKERRQ(ierr);
3800   } else {
3801     PetscInt ml,nl;
3802 
3803     M    = *newmat;
3804     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3805     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3806     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3807     /*
3808          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3809        rather than the slower MatSetValues().
3810     */
3811     M->was_assembled = PETSC_TRUE;
3812     M->assembled     = PETSC_FALSE;
3813   }
3814   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3815   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3816   ii   = aij->i;
3817   jj   = aij->j;
3818 
3819   /* trigger copy to CPU if needed */
3820   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3821   for (i=0; i<m; i++) {
3822     row   = rstart + i;
3823     nz    = ii[i+1] - ii[i];
3824     cwork = jj; jj += nz;
3825     vwork = aa; aa += nz;
3826     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3827   }
3828   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3829 
3830   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   *newmat = M;
3833 
3834   /* save submatrix used in processor for next request */
3835   if (call ==  MAT_INITIAL_MATRIX) {
3836     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3837     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3838   }
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3843 {
3844   PetscInt       m,cstart, cend,j,nnz,i,d;
3845   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3846   const PetscInt *JJ;
3847   PetscErrorCode ierr;
3848   PetscBool      nooffprocentries;
3849 
3850   PetscFunctionBegin;
3851   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3852 
3853   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3854   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3855   m      = B->rmap->n;
3856   cstart = B->cmap->rstart;
3857   cend   = B->cmap->rend;
3858   rstart = B->rmap->rstart;
3859 
3860   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3861 
3862   if (PetscDefined(USE_DEBUG)) {
3863     for (i=0; i<m; i++) {
3864       nnz = Ii[i+1]- Ii[i];
3865       JJ  = J + Ii[i];
3866       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3867       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3868       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3869     }
3870   }
3871 
3872   for (i=0; i<m; i++) {
3873     nnz     = Ii[i+1]- Ii[i];
3874     JJ      = J + Ii[i];
3875     nnz_max = PetscMax(nnz_max,nnz);
3876     d       = 0;
3877     for (j=0; j<nnz; j++) {
3878       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3879     }
3880     d_nnz[i] = d;
3881     o_nnz[i] = nnz - d;
3882   }
3883   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3884   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3885 
3886   for (i=0; i<m; i++) {
3887     ii   = i + rstart;
3888     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3889   }
3890   nooffprocentries    = B->nooffprocentries;
3891   B->nooffprocentries = PETSC_TRUE;
3892   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   B->nooffprocentries = nooffprocentries;
3895 
3896   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 /*@
3901    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3902    (the default parallel PETSc format).
3903 
3904    Collective
3905 
3906    Input Parameters:
3907 +  B - the matrix
3908 .  i - the indices into j for the start of each local row (starts with zero)
3909 .  j - the column indices for each local row (starts with zero)
3910 -  v - optional values in the matrix
3911 
3912    Level: developer
3913 
3914    Notes:
3915        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3916      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3917      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3918 
3919        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3920 
3921        The format which is used for the sparse matrix input, is equivalent to a
3922     row-major ordering.. i.e for the following matrix, the input data expected is
3923     as shown
3924 
3925 $        1 0 0
3926 $        2 0 3     P0
3927 $       -------
3928 $        4 5 6     P1
3929 $
3930 $     Process0 [P0]: rows_owned=[0,1]
3931 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3932 $        j =  {0,0,2}  [size = 3]
3933 $        v =  {1,2,3}  [size = 3]
3934 $
3935 $     Process1 [P1]: rows_owned=[2]
3936 $        i =  {0,3}    [size = nrow+1  = 1+1]
3937 $        j =  {0,1,2}  [size = 3]
3938 $        v =  {4,5,6}  [size = 3]
3939 
3940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3941           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3942 @*/
3943 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3944 {
3945   PetscErrorCode ierr;
3946 
3947   PetscFunctionBegin;
3948   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3949   PetscFunctionReturn(0);
3950 }
3951 
3952 /*@C
3953    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3954    (the default parallel PETSc format).  For good matrix assembly performance
3955    the user should preallocate the matrix storage by setting the parameters
3956    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3957    performance can be increased by more than a factor of 50.
3958 
3959    Collective
3960 
3961    Input Parameters:
3962 +  B - the matrix
3963 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3964            (same value is used for all local rows)
3965 .  d_nnz - array containing the number of nonzeros in the various rows of the
3966            DIAGONAL portion of the local submatrix (possibly different for each row)
3967            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3968            The size of this array is equal to the number of local rows, i.e 'm'.
3969            For matrices that will be factored, you must leave room for (and set)
3970            the diagonal entry even if it is zero.
3971 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3972            submatrix (same value is used for all local rows).
3973 -  o_nnz - array containing the number of nonzeros in the various rows of the
3974            OFF-DIAGONAL portion of the local submatrix (possibly different for
3975            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3976            structure. The size of this array is equal to the number
3977            of local rows, i.e 'm'.
3978 
3979    If the *_nnz parameter is given then the *_nz parameter is ignored
3980 
3981    The AIJ format (also called the Yale sparse matrix format or
3982    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3983    storage.  The stored row and column indices begin with zero.
3984    See Users-Manual: ch_mat for details.
3985 
3986    The parallel matrix is partitioned such that the first m0 rows belong to
3987    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3988    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3989 
3990    The DIAGONAL portion of the local submatrix of a processor can be defined
3991    as the submatrix which is obtained by extraction the part corresponding to
3992    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3993    first row that belongs to the processor, r2 is the last row belonging to
3994    the this processor, and c1-c2 is range of indices of the local part of a
3995    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3996    common case of a square matrix, the row and column ranges are the same and
3997    the DIAGONAL part is also square. The remaining portion of the local
3998    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3999 
4000    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4001 
4002    You can call MatGetInfo() to get information on how effective the preallocation was;
4003    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4004    You can also run with the option -info and look for messages with the string
4005    malloc in them to see if additional memory allocation was needed.
4006 
4007    Example usage:
4008 
4009    Consider the following 8x8 matrix with 34 non-zero values, that is
4010    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4011    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4012    as follows:
4013 
4014 .vb
4015             1  2  0  |  0  3  0  |  0  4
4016     Proc0   0  5  6  |  7  0  0  |  8  0
4017             9  0 10  | 11  0  0  | 12  0
4018     -------------------------------------
4019            13  0 14  | 15 16 17  |  0  0
4020     Proc1   0 18  0  | 19 20 21  |  0  0
4021             0  0  0  | 22 23  0  | 24  0
4022     -------------------------------------
4023     Proc2  25 26 27  |  0  0 28  | 29  0
4024            30  0  0  | 31 32 33  |  0 34
4025 .ve
4026 
4027    This can be represented as a collection of submatrices as:
4028 
4029 .vb
4030       A B C
4031       D E F
4032       G H I
4033 .ve
4034 
4035    Where the submatrices A,B,C are owned by proc0, D,E,F are
4036    owned by proc1, G,H,I are owned by proc2.
4037 
4038    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4039    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4040    The 'M','N' parameters are 8,8, and have the same values on all procs.
4041 
4042    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4043    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4044    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4045    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4046    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4047    matrix, ans [DF] as another SeqAIJ matrix.
4048 
4049    When d_nz, o_nz parameters are specified, d_nz storage elements are
4050    allocated for every row of the local diagonal submatrix, and o_nz
4051    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4052    One way to choose d_nz and o_nz is to use the max nonzerors per local
4053    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4054    In this case, the values of d_nz,o_nz are:
4055 .vb
4056      proc0 : dnz = 2, o_nz = 2
4057      proc1 : dnz = 3, o_nz = 2
4058      proc2 : dnz = 1, o_nz = 4
4059 .ve
4060    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4061    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4062    for proc3. i.e we are using 12+15+10=37 storage locations to store
4063    34 values.
4064 
4065    When d_nnz, o_nnz parameters are specified, the storage is specified
4066    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4067    In the above case the values for d_nnz,o_nnz are:
4068 .vb
4069      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4070      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4071      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4072 .ve
4073    Here the space allocated is sum of all the above values i.e 34, and
4074    hence pre-allocation is perfect.
4075 
4076    Level: intermediate
4077 
4078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4079           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4080 @*/
4081 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4082 {
4083   PetscErrorCode ierr;
4084 
4085   PetscFunctionBegin;
4086   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4087   PetscValidType(B,1);
4088   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4089   PetscFunctionReturn(0);
4090 }
4091 
4092 /*@
4093      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4094          CSR format for the local rows.
4095 
4096    Collective
4097 
4098    Input Parameters:
4099 +  comm - MPI communicator
4100 .  m - number of local rows (Cannot be PETSC_DECIDE)
4101 .  n - This value should be the same as the local size used in creating the
4102        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4103        calculated if N is given) For square matrices n is almost always m.
4104 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4105 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4106 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4107 .   j - column indices
4108 -   a - matrix values
4109 
4110    Output Parameter:
4111 .   mat - the matrix
4112 
4113    Level: intermediate
4114 
4115    Notes:
4116        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4117      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4118      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4119 
4120        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4121 
4122        The format which is used for the sparse matrix input, is equivalent to a
4123     row-major ordering.. i.e for the following matrix, the input data expected is
4124     as shown
4125 
4126        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4127 
4128 $        1 0 0
4129 $        2 0 3     P0
4130 $       -------
4131 $        4 5 6     P1
4132 $
4133 $     Process0 [P0]: rows_owned=[0,1]
4134 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4135 $        j =  {0,0,2}  [size = 3]
4136 $        v =  {1,2,3}  [size = 3]
4137 $
4138 $     Process1 [P1]: rows_owned=[2]
4139 $        i =  {0,3}    [size = nrow+1  = 1+1]
4140 $        j =  {0,1,2}  [size = 3]
4141 $        v =  {4,5,6}  [size = 3]
4142 
4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4144           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4145 @*/
4146 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4147 {
4148   PetscErrorCode ierr;
4149 
4150   PetscFunctionBegin;
4151   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4152   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4153   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4154   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4155   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4156   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4157   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4158   PetscFunctionReturn(0);
4159 }
4160 
4161 /*@
4162      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4163          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4164 
4165    Collective
4166 
4167    Input Parameters:
4168 +  mat - the matrix
4169 .  m - number of local rows (Cannot be PETSC_DECIDE)
4170 .  n - This value should be the same as the local size used in creating the
4171        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4172        calculated if N is given) For square matrices n is almost always m.
4173 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4174 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4175 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4176 .  J - column indices
4177 -  v - matrix values
4178 
4179    Level: intermediate
4180 
4181 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4182           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4183 @*/
4184 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4185 {
4186   PetscErrorCode ierr;
4187   PetscInt       cstart,nnz,i,j;
4188   PetscInt       *ld;
4189   PetscBool      nooffprocentries;
4190   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4191   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4192   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4193   const PetscInt *Adi = Ad->i;
4194   PetscInt       ldi,Iii,md;
4195 
4196   PetscFunctionBegin;
4197   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4198   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4199   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4200   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4201 
4202   cstart = mat->cmap->rstart;
4203   if (!Aij->ld) {
4204     /* count number of entries below block diagonal */
4205     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4206     Aij->ld = ld;
4207     for (i=0; i<m; i++) {
4208       nnz  = Ii[i+1]- Ii[i];
4209       j     = 0;
4210       while  (J[j] < cstart && j < nnz) {j++;}
4211       J    += nnz;
4212       ld[i] = j;
4213     }
4214   } else {
4215     ld = Aij->ld;
4216   }
4217 
4218   for (i=0; i<m; i++) {
4219     nnz  = Ii[i+1]- Ii[i];
4220     Iii  = Ii[i];
4221     ldi  = ld[i];
4222     md   = Adi[i+1]-Adi[i];
4223     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4224     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4225     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4226     ad  += md;
4227     ao  += nnz - md;
4228   }
4229   nooffprocentries      = mat->nooffprocentries;
4230   mat->nooffprocentries = PETSC_TRUE;
4231   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4232   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4233   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4234   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4235   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4236   mat->nooffprocentries = nooffprocentries;
4237   PetscFunctionReturn(0);
4238 }
4239 
4240 /*@C
4241    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4242    (the default parallel PETSc format).  For good matrix assembly performance
4243    the user should preallocate the matrix storage by setting the parameters
4244    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4245    performance can be increased by more than a factor of 50.
4246 
4247    Collective
4248 
4249    Input Parameters:
4250 +  comm - MPI communicator
4251 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4252            This value should be the same as the local size used in creating the
4253            y vector for the matrix-vector product y = Ax.
4254 .  n - This value should be the same as the local size used in creating the
4255        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4256        calculated if N is given) For square matrices n is almost always m.
4257 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4258 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4259 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4260            (same value is used for all local rows)
4261 .  d_nnz - array containing the number of nonzeros in the various rows of the
4262            DIAGONAL portion of the local submatrix (possibly different for each row)
4263            or NULL, if d_nz is used to specify the nonzero structure.
4264            The size of this array is equal to the number of local rows, i.e 'm'.
4265 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4266            submatrix (same value is used for all local rows).
4267 -  o_nnz - array containing the number of nonzeros in the various rows of the
4268            OFF-DIAGONAL portion of the local submatrix (possibly different for
4269            each row) or NULL, if o_nz is used to specify the nonzero
4270            structure. The size of this array is equal to the number
4271            of local rows, i.e 'm'.
4272 
4273    Output Parameter:
4274 .  A - the matrix
4275 
4276    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4277    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4278    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4279 
4280    Notes:
4281    If the *_nnz parameter is given then the *_nz parameter is ignored
4282 
4283    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4284    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4285    storage requirements for this matrix.
4286 
4287    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4288    processor than it must be used on all processors that share the object for
4289    that argument.
4290 
4291    The user MUST specify either the local or global matrix dimensions
4292    (possibly both).
4293 
4294    The parallel matrix is partitioned across processors such that the
4295    first m0 rows belong to process 0, the next m1 rows belong to
4296    process 1, the next m2 rows belong to process 2 etc.. where
4297    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4298    values corresponding to [m x N] submatrix.
4299 
4300    The columns are logically partitioned with the n0 columns belonging
4301    to 0th partition, the next n1 columns belonging to the next
4302    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4303 
4304    The DIAGONAL portion of the local submatrix on any given processor
4305    is the submatrix corresponding to the rows and columns m,n
4306    corresponding to the given processor. i.e diagonal matrix on
4307    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4308    etc. The remaining portion of the local submatrix [m x (N-n)]
4309    constitute the OFF-DIAGONAL portion. The example below better
4310    illustrates this concept.
4311 
4312    For a square global matrix we define each processor's diagonal portion
4313    to be its local rows and the corresponding columns (a square submatrix);
4314    each processor's off-diagonal portion encompasses the remainder of the
4315    local matrix (a rectangular submatrix).
4316 
4317    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4318 
4319    When calling this routine with a single process communicator, a matrix of
4320    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4321    type of communicator, use the construction mechanism
4322 .vb
4323      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4324 .ve
4325 
4326 $     MatCreate(...,&A);
4327 $     MatSetType(A,MATMPIAIJ);
4328 $     MatSetSizes(A, m,n,M,N);
4329 $     MatMPIAIJSetPreallocation(A,...);
4330 
4331    By default, this format uses inodes (identical nodes) when possible.
4332    We search for consecutive rows with the same nonzero structure, thereby
4333    reusing matrix information to achieve increased efficiency.
4334 
4335    Options Database Keys:
4336 +  -mat_no_inode  - Do not use inodes
4337 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4338 
4339 
4340 
4341    Example usage:
4342 
4343    Consider the following 8x8 matrix with 34 non-zero values, that is
4344    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4345    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4346    as follows
4347 
4348 .vb
4349             1  2  0  |  0  3  0  |  0  4
4350     Proc0   0  5  6  |  7  0  0  |  8  0
4351             9  0 10  | 11  0  0  | 12  0
4352     -------------------------------------
4353            13  0 14  | 15 16 17  |  0  0
4354     Proc1   0 18  0  | 19 20 21  |  0  0
4355             0  0  0  | 22 23  0  | 24  0
4356     -------------------------------------
4357     Proc2  25 26 27  |  0  0 28  | 29  0
4358            30  0  0  | 31 32 33  |  0 34
4359 .ve
4360 
4361    This can be represented as a collection of submatrices as
4362 
4363 .vb
4364       A B C
4365       D E F
4366       G H I
4367 .ve
4368 
4369    Where the submatrices A,B,C are owned by proc0, D,E,F are
4370    owned by proc1, G,H,I are owned by proc2.
4371 
4372    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4373    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4374    The 'M','N' parameters are 8,8, and have the same values on all procs.
4375 
4376    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4377    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4378    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4379    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4380    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4381    matrix, ans [DF] as another SeqAIJ matrix.
4382 
4383    When d_nz, o_nz parameters are specified, d_nz storage elements are
4384    allocated for every row of the local diagonal submatrix, and o_nz
4385    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4386    One way to choose d_nz and o_nz is to use the max nonzerors per local
4387    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4388    In this case, the values of d_nz,o_nz are
4389 .vb
4390      proc0 : dnz = 2, o_nz = 2
4391      proc1 : dnz = 3, o_nz = 2
4392      proc2 : dnz = 1, o_nz = 4
4393 .ve
4394    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4395    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4396    for proc3. i.e we are using 12+15+10=37 storage locations to store
4397    34 values.
4398 
4399    When d_nnz, o_nnz parameters are specified, the storage is specified
4400    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4401    In the above case the values for d_nnz,o_nnz are
4402 .vb
4403      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4404      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4405      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4406 .ve
4407    Here the space allocated is sum of all the above values i.e 34, and
4408    hence pre-allocation is perfect.
4409 
4410    Level: intermediate
4411 
4412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4413           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4414 @*/
4415 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4416 {
4417   PetscErrorCode ierr;
4418   PetscMPIInt    size;
4419 
4420   PetscFunctionBegin;
4421   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4422   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4423   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4424   if (size > 1) {
4425     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4426     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4427   } else {
4428     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4429     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4430   }
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 /*@C
4435   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4436 
4437   Not collective
4438 
4439   Input Parameter:
4440 . A - The MPIAIJ matrix
4441 
4442   Output Parameters:
4443 + Ad - The local diagonal block as a SeqAIJ matrix
4444 . Ao - The local off-diagonal block as a SeqAIJ matrix
4445 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4446 
4447   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4448   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4449   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4450   local column numbers to global column numbers in the original matrix.
4451 
4452   Level: intermediate
4453 
4454 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4455 @*/
4456 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4457 {
4458   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4459   PetscBool      flg;
4460   PetscErrorCode ierr;
4461 
4462   PetscFunctionBegin;
4463   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4464   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4465   if (Ad)     *Ad     = a->A;
4466   if (Ao)     *Ao     = a->B;
4467   if (colmap) *colmap = a->garray;
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4472 {
4473   PetscErrorCode ierr;
4474   PetscInt       m,N,i,rstart,nnz,Ii;
4475   PetscInt       *indx;
4476   PetscScalar    *values;
4477 
4478   PetscFunctionBegin;
4479   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4480   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4481     PetscInt       *dnz,*onz,sum,bs,cbs;
4482 
4483     if (n == PETSC_DECIDE) {
4484       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4485     }
4486     /* Check sum(n) = N */
4487     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4488     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4489 
4490     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4491     rstart -= m;
4492 
4493     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4494     for (i=0; i<m; i++) {
4495       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4496       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4497       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4498     }
4499 
4500     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4501     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4502     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4503     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4504     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4505     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4506     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4507     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4508     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4509   }
4510 
4511   /* numeric phase */
4512   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4513   for (i=0; i<m; i++) {
4514     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4515     Ii   = i + rstart;
4516     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4525 {
4526   PetscErrorCode    ierr;
4527   PetscMPIInt       rank;
4528   PetscInt          m,N,i,rstart,nnz;
4529   size_t            len;
4530   const PetscInt    *indx;
4531   PetscViewer       out;
4532   char              *name;
4533   Mat               B;
4534   const PetscScalar *values;
4535 
4536   PetscFunctionBegin;
4537   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4538   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4539   /* Should this be the type of the diagonal block of A? */
4540   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4541   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4542   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4543   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4544   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4545   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4546   for (i=0; i<m; i++) {
4547     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4548     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4549     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4550   }
4551   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553 
4554   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4555   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4556   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4557   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4558   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4559   ierr = PetscFree(name);CHKERRQ(ierr);
4560   ierr = MatView(B,out);CHKERRQ(ierr);
4561   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4562   ierr = MatDestroy(&B);CHKERRQ(ierr);
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4567 {
4568   PetscErrorCode      ierr;
4569   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4570 
4571   PetscFunctionBegin;
4572   if (!merge) PetscFunctionReturn(0);
4573   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4582   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4583   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4584   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4585   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4586   ierr = PetscFree(merge);CHKERRQ(ierr);
4587   PetscFunctionReturn(0);
4588 }
4589 
4590 #include <../src/mat/utils/freespace.h>
4591 #include <petscbt.h>
4592 
4593 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4594 {
4595   PetscErrorCode      ierr;
4596   MPI_Comm            comm;
4597   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4598   PetscMPIInt         size,rank,taga,*len_s;
4599   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4600   PetscInt            proc,m;
4601   PetscInt            **buf_ri,**buf_rj;
4602   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4603   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4604   MPI_Request         *s_waits,*r_waits;
4605   MPI_Status          *status;
4606   MatScalar           *aa=a->a;
4607   MatScalar           **abuf_r,*ba_i;
4608   Mat_Merge_SeqsToMPI *merge;
4609   PetscContainer      container;
4610 
4611   PetscFunctionBegin;
4612   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4613   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4614 
4615   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4616   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4617 
4618   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4619   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4620   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4621 
4622   bi     = merge->bi;
4623   bj     = merge->bj;
4624   buf_ri = merge->buf_ri;
4625   buf_rj = merge->buf_rj;
4626 
4627   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4628   owners = merge->rowmap->range;
4629   len_s  = merge->len_s;
4630 
4631   /* send and recv matrix values */
4632   /*-----------------------------*/
4633   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4634   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4635 
4636   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4637   for (proc=0,k=0; proc<size; proc++) {
4638     if (!len_s[proc]) continue;
4639     i    = owners[proc];
4640     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4641     k++;
4642   }
4643 
4644   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4645   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4646   ierr = PetscFree(status);CHKERRQ(ierr);
4647 
4648   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4650 
4651   /* insert mat values of mpimat */
4652   /*----------------------------*/
4653   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4654   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4655 
4656   for (k=0; k<merge->nrecv; k++) {
4657     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4658     nrows       = *(buf_ri_k[k]);
4659     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4660     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4661   }
4662 
4663   /* set values of ba */
4664   m = merge->rowmap->n;
4665   for (i=0; i<m; i++) {
4666     arow = owners[rank] + i;
4667     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4668     bnzi = bi[i+1] - bi[i];
4669     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4670 
4671     /* add local non-zero vals of this proc's seqmat into ba */
4672     anzi   = ai[arow+1] - ai[arow];
4673     aj     = a->j + ai[arow];
4674     aa     = a->a + ai[arow];
4675     nextaj = 0;
4676     for (j=0; nextaj<anzi; j++) {
4677       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4678         ba_i[j] += aa[nextaj++];
4679       }
4680     }
4681 
4682     /* add received vals into ba */
4683     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4684       /* i-th row */
4685       if (i == *nextrow[k]) {
4686         anzi   = *(nextai[k]+1) - *nextai[k];
4687         aj     = buf_rj[k] + *(nextai[k]);
4688         aa     = abuf_r[k] + *(nextai[k]);
4689         nextaj = 0;
4690         for (j=0; nextaj<anzi; j++) {
4691           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4692             ba_i[j] += aa[nextaj++];
4693           }
4694         }
4695         nextrow[k]++; nextai[k]++;
4696       }
4697     }
4698     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4699   }
4700   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4702 
4703   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4704   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4705   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4706   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4707   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4712 {
4713   PetscErrorCode      ierr;
4714   Mat                 B_mpi;
4715   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4716   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4717   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4718   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4719   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4720   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4721   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4722   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4723   MPI_Status          *status;
4724   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4725   PetscBT             lnkbt;
4726   Mat_Merge_SeqsToMPI *merge;
4727   PetscContainer      container;
4728 
4729   PetscFunctionBegin;
4730   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4731 
4732   /* make sure it is a PETSc comm */
4733   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4734   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4735   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4736 
4737   ierr = PetscNew(&merge);CHKERRQ(ierr);
4738   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4739 
4740   /* determine row ownership */
4741   /*---------------------------------------------------------*/
4742   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4744   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4745   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4746   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4747   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4748   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4749 
4750   m      = merge->rowmap->n;
4751   owners = merge->rowmap->range;
4752 
4753   /* determine the number of messages to send, their lengths */
4754   /*---------------------------------------------------------*/
4755   len_s = merge->len_s;
4756 
4757   len          = 0; /* length of buf_si[] */
4758   merge->nsend = 0;
4759   for (proc=0; proc<size; proc++) {
4760     len_si[proc] = 0;
4761     if (proc == rank) {
4762       len_s[proc] = 0;
4763     } else {
4764       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4765       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4766     }
4767     if (len_s[proc]) {
4768       merge->nsend++;
4769       nrows = 0;
4770       for (i=owners[proc]; i<owners[proc+1]; i++) {
4771         if (ai[i+1] > ai[i]) nrows++;
4772       }
4773       len_si[proc] = 2*(nrows+1);
4774       len         += len_si[proc];
4775     }
4776   }
4777 
4778   /* determine the number and length of messages to receive for ij-structure */
4779   /*-------------------------------------------------------------------------*/
4780   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4781   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4782 
4783   /* post the Irecv of j-structure */
4784   /*-------------------------------*/
4785   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4786   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4787 
4788   /* post the Isend of j-structure */
4789   /*--------------------------------*/
4790   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4791 
4792   for (proc=0, k=0; proc<size; proc++) {
4793     if (!len_s[proc]) continue;
4794     i    = owners[proc];
4795     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4796     k++;
4797   }
4798 
4799   /* receives and sends of j-structure are complete */
4800   /*------------------------------------------------*/
4801   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4802   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4803 
4804   /* send and recv i-structure */
4805   /*---------------------------*/
4806   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4807   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4808 
4809   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4810   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4811   for (proc=0,k=0; proc<size; proc++) {
4812     if (!len_s[proc]) continue;
4813     /* form outgoing message for i-structure:
4814          buf_si[0]:                 nrows to be sent
4815                [1:nrows]:           row index (global)
4816                [nrows+1:2*nrows+1]: i-structure index
4817     */
4818     /*-------------------------------------------*/
4819     nrows       = len_si[proc]/2 - 1;
4820     buf_si_i    = buf_si + nrows+1;
4821     buf_si[0]   = nrows;
4822     buf_si_i[0] = 0;
4823     nrows       = 0;
4824     for (i=owners[proc]; i<owners[proc+1]; i++) {
4825       anzi = ai[i+1] - ai[i];
4826       if (anzi) {
4827         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4828         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4829         nrows++;
4830       }
4831     }
4832     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4833     k++;
4834     buf_si += len_si[proc];
4835   }
4836 
4837   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4838   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4839 
4840   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4841   for (i=0; i<merge->nrecv; i++) {
4842     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4843   }
4844 
4845   ierr = PetscFree(len_si);CHKERRQ(ierr);
4846   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4847   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4848   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4849   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4850   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4851   ierr = PetscFree(status);CHKERRQ(ierr);
4852 
4853   /* compute a local seq matrix in each processor */
4854   /*----------------------------------------------*/
4855   /* allocate bi array and free space for accumulating nonzero column info */
4856   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4857   bi[0] = 0;
4858 
4859   /* create and initialize a linked list */
4860   nlnk = N+1;
4861   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4862 
4863   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4864   len  = ai[owners[rank+1]] - ai[owners[rank]];
4865   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4866 
4867   current_space = free_space;
4868 
4869   /* determine symbolic info for each local row */
4870   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4871 
4872   for (k=0; k<merge->nrecv; k++) {
4873     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4874     nrows       = *buf_ri_k[k];
4875     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4876     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4877   }
4878 
4879   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4880   len  = 0;
4881   for (i=0; i<m; i++) {
4882     bnzi = 0;
4883     /* add local non-zero cols of this proc's seqmat into lnk */
4884     arow  = owners[rank] + i;
4885     anzi  = ai[arow+1] - ai[arow];
4886     aj    = a->j + ai[arow];
4887     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4888     bnzi += nlnk;
4889     /* add received col data into lnk */
4890     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4891       if (i == *nextrow[k]) { /* i-th row */
4892         anzi  = *(nextai[k]+1) - *nextai[k];
4893         aj    = buf_rj[k] + *nextai[k];
4894         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4895         bnzi += nlnk;
4896         nextrow[k]++; nextai[k]++;
4897       }
4898     }
4899     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4900 
4901     /* if free space is not available, make more free space */
4902     if (current_space->local_remaining<bnzi) {
4903       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4904       nspacedouble++;
4905     }
4906     /* copy data into free space, then initialize lnk */
4907     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4908     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4909 
4910     current_space->array           += bnzi;
4911     current_space->local_used      += bnzi;
4912     current_space->local_remaining -= bnzi;
4913 
4914     bi[i+1] = bi[i] + bnzi;
4915   }
4916 
4917   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4918 
4919   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4920   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4921   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4922 
4923   /* create symbolic parallel matrix B_mpi */
4924   /*---------------------------------------*/
4925   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4926   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4927   if (n==PETSC_DECIDE) {
4928     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4929   } else {
4930     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4931   }
4932   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4933   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4934   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4935   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4936   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4937 
4938   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4939   B_mpi->assembled  = PETSC_FALSE;
4940   merge->bi         = bi;
4941   merge->bj         = bj;
4942   merge->buf_ri     = buf_ri;
4943   merge->buf_rj     = buf_rj;
4944   merge->coi        = NULL;
4945   merge->coj        = NULL;
4946   merge->owners_co  = NULL;
4947 
4948   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4949 
4950   /* attach the supporting struct to B_mpi for reuse */
4951   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4952   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4953   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4954   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4955   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4956   *mpimat = B_mpi;
4957 
4958   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 /*@C
4963       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4964                  matrices from each processor
4965 
4966     Collective
4967 
4968    Input Parameters:
4969 +    comm - the communicators the parallel matrix will live on
4970 .    seqmat - the input sequential matrices
4971 .    m - number of local rows (or PETSC_DECIDE)
4972 .    n - number of local columns (or PETSC_DECIDE)
4973 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4974 
4975    Output Parameter:
4976 .    mpimat - the parallel matrix generated
4977 
4978     Level: advanced
4979 
4980    Notes:
4981      The dimensions of the sequential matrix in each processor MUST be the same.
4982      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4983      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4984 @*/
4985 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4986 {
4987   PetscErrorCode ierr;
4988   PetscMPIInt    size;
4989 
4990   PetscFunctionBegin;
4991   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4992   if (size == 1) {
4993     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4994     if (scall == MAT_INITIAL_MATRIX) {
4995       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4996     } else {
4997       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4998     }
4999     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000     PetscFunctionReturn(0);
5001   }
5002   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5003   if (scall == MAT_INITIAL_MATRIX) {
5004     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5005   }
5006   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5007   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008   PetscFunctionReturn(0);
5009 }
5010 
5011 /*@
5012      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5013           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5014           with MatGetSize()
5015 
5016     Not Collective
5017 
5018    Input Parameters:
5019 +    A - the matrix
5020 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5021 
5022    Output Parameter:
5023 .    A_loc - the local sequential matrix generated
5024 
5025     Level: developer
5026 
5027    Notes:
5028      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5029      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5030      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5031      modify the values of the returned A_loc.
5032 
5033 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5034 @*/
5035 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5036 {
5037   PetscErrorCode    ierr;
5038   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5039   Mat_SeqAIJ        *mat,*a,*b;
5040   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5041   const PetscScalar *aa,*ba,*aav,*bav;
5042   PetscScalar       *ca,*cam;
5043   PetscMPIInt       size;
5044   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5045   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5046   PetscBool         match;
5047 
5048   PetscFunctionBegin;
5049   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5050   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5051   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5052   if (size == 1) {
5053     if (scall == MAT_INITIAL_MATRIX) {
5054       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5055       *A_loc = mpimat->A;
5056     } else if (scall == MAT_REUSE_MATRIX) {
5057       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5058     }
5059     PetscFunctionReturn(0);
5060   }
5061 
5062   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5063   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5064   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5065   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5066   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5067   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5068   aa   = aav;
5069   ba   = bav;
5070   if (scall == MAT_INITIAL_MATRIX) {
5071     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5072     ci[0] = 0;
5073     for (i=0; i<am; i++) {
5074       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5075     }
5076     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5077     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5078     k    = 0;
5079     for (i=0; i<am; i++) {
5080       ncols_o = bi[i+1] - bi[i];
5081       ncols_d = ai[i+1] - ai[i];
5082       /* off-diagonal portion of A */
5083       for (jo=0; jo<ncols_o; jo++) {
5084         col = cmap[*bj];
5085         if (col >= cstart) break;
5086         cj[k]   = col; bj++;
5087         ca[k++] = *ba++;
5088       }
5089       /* diagonal portion of A */
5090       for (j=0; j<ncols_d; j++) {
5091         cj[k]   = cstart + *aj++;
5092         ca[k++] = *aa++;
5093       }
5094       /* off-diagonal portion of A */
5095       for (j=jo; j<ncols_o; j++) {
5096         cj[k]   = cmap[*bj++];
5097         ca[k++] = *ba++;
5098       }
5099     }
5100     /* put together the new matrix */
5101     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5102     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5103     /* Since these are PETSc arrays, change flags to free them as necessary. */
5104     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5105     mat->free_a  = PETSC_TRUE;
5106     mat->free_ij = PETSC_TRUE;
5107     mat->nonew   = 0;
5108   } else if (scall == MAT_REUSE_MATRIX) {
5109     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5110 #if defined(PETSC_USE_DEVICE)
5111     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5112 #endif
5113     ci = mat->i; cj = mat->j; cam = mat->a;
5114     for (i=0; i<am; i++) {
5115       /* off-diagonal portion of A */
5116       ncols_o = bi[i+1] - bi[i];
5117       for (jo=0; jo<ncols_o; jo++) {
5118         col = cmap[*bj];
5119         if (col >= cstart) break;
5120         *cam++ = *ba++; bj++;
5121       }
5122       /* diagonal portion of A */
5123       ncols_d = ai[i+1] - ai[i];
5124       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5125       /* off-diagonal portion of A */
5126       for (j=jo; j<ncols_o; j++) {
5127         *cam++ = *ba++; bj++;
5128       }
5129     }
5130   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5131   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5132   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5133   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5134   PetscFunctionReturn(0);
5135 }
5136 
5137 /*@
5138      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5139           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5140 
5141     Not Collective
5142 
5143    Input Parameters:
5144 +    A - the matrix
5145 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5146 
5147    Output Parameter:
5148 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5149 -    A_loc - the local sequential matrix generated
5150 
5151     Level: developer
5152 
5153    Notes:
5154      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5155 
5156 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5157 
5158 @*/
5159 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5160 {
5161   PetscErrorCode ierr;
5162   Mat            Ao,Ad;
5163   const PetscInt *cmap;
5164   PetscMPIInt    size;
5165   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5166 
5167   PetscFunctionBegin;
5168   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5169   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5170   if (size == 1) {
5171     if (scall == MAT_INITIAL_MATRIX) {
5172       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5173       *A_loc = Ad;
5174     } else if (scall == MAT_REUSE_MATRIX) {
5175       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5176     }
5177     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5178     PetscFunctionReturn(0);
5179   }
5180   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5181   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5182   if (f) {
5183     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5184   } else {
5185     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5186     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5187     Mat_SeqAIJ        *c;
5188     PetscInt          *ai = a->i, *aj = a->j;
5189     PetscInt          *bi = b->i, *bj = b->j;
5190     PetscInt          *ci,*cj;
5191     const PetscScalar *aa,*ba;
5192     PetscScalar       *ca;
5193     PetscInt          i,j,am,dn,on;
5194 
5195     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5196     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5197     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5198     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5199     if (scall == MAT_INITIAL_MATRIX) {
5200       PetscInt k;
5201       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5202       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5203       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5204       ci[0] = 0;
5205       for (i=0,k=0; i<am; i++) {
5206         const PetscInt ncols_o = bi[i+1] - bi[i];
5207         const PetscInt ncols_d = ai[i+1] - ai[i];
5208         ci[i+1] = ci[i] + ncols_o + ncols_d;
5209         /* diagonal portion of A */
5210         for (j=0; j<ncols_d; j++,k++) {
5211           cj[k] = *aj++;
5212           ca[k] = *aa++;
5213         }
5214         /* off-diagonal portion of A */
5215         for (j=0; j<ncols_o; j++,k++) {
5216           cj[k] = dn + *bj++;
5217           ca[k] = *ba++;
5218         }
5219       }
5220       /* put together the new matrix */
5221       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5222       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5223       /* Since these are PETSc arrays, change flags to free them as necessary. */
5224       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5225       c->free_a  = PETSC_TRUE;
5226       c->free_ij = PETSC_TRUE;
5227       c->nonew   = 0;
5228       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5229     } else if (scall == MAT_REUSE_MATRIX) {
5230 #if defined(PETSC_HAVE_DEVICE)
5231       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5232 #endif
5233       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5234       ca = c->a;
5235       for (i=0; i<am; i++) {
5236         const PetscInt ncols_d = ai[i+1] - ai[i];
5237         const PetscInt ncols_o = bi[i+1] - bi[i];
5238         /* diagonal portion of A */
5239         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5240         /* off-diagonal portion of A */
5241         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5242       }
5243     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5244     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5245     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5246     if (glob) {
5247       PetscInt cst, *gidx;
5248 
5249       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5250       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5251       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5252       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5253       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5254     }
5255   }
5256   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5257   PetscFunctionReturn(0);
5258 }
5259 
5260 /*@C
5261      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5262 
5263     Not Collective
5264 
5265    Input Parameters:
5266 +    A - the matrix
5267 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5268 -    row, col - index sets of rows and columns to extract (or NULL)
5269 
5270    Output Parameter:
5271 .    A_loc - the local sequential matrix generated
5272 
5273     Level: developer
5274 
5275 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5276 
5277 @*/
5278 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5279 {
5280   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5281   PetscErrorCode ierr;
5282   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5283   IS             isrowa,iscola;
5284   Mat            *aloc;
5285   PetscBool      match;
5286 
5287   PetscFunctionBegin;
5288   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5289   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5290   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5291   if (!row) {
5292     start = A->rmap->rstart; end = A->rmap->rend;
5293     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5294   } else {
5295     isrowa = *row;
5296   }
5297   if (!col) {
5298     start = A->cmap->rstart;
5299     cmap  = a->garray;
5300     nzA   = a->A->cmap->n;
5301     nzB   = a->B->cmap->n;
5302     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5303     ncols = 0;
5304     for (i=0; i<nzB; i++) {
5305       if (cmap[i] < start) idx[ncols++] = cmap[i];
5306       else break;
5307     }
5308     imark = i;
5309     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5310     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5311     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5312   } else {
5313     iscola = *col;
5314   }
5315   if (scall != MAT_INITIAL_MATRIX) {
5316     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5317     aloc[0] = *A_loc;
5318   }
5319   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5320   if (!col) { /* attach global id of condensed columns */
5321     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5322   }
5323   *A_loc = aloc[0];
5324   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5325   if (!row) {
5326     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5327   }
5328   if (!col) {
5329     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5330   }
5331   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5332   PetscFunctionReturn(0);
5333 }
5334 
5335 /*
5336  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5337  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5338  * on a global size.
5339  * */
5340 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5341 {
5342   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5343   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5344   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5345   PetscMPIInt              owner;
5346   PetscSFNode              *iremote,*oiremote;
5347   const PetscInt           *lrowindices;
5348   PetscErrorCode           ierr;
5349   PetscSF                  sf,osf;
5350   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5351   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5352   MPI_Comm                 comm;
5353   ISLocalToGlobalMapping   mapping;
5354 
5355   PetscFunctionBegin;
5356   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5357   /* plocalsize is the number of roots
5358    * nrows is the number of leaves
5359    * */
5360   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5361   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5362   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5363   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5364   for (i=0;i<nrows;i++) {
5365     /* Find a remote index and an owner for a row
5366      * The row could be local or remote
5367      * */
5368     owner = 0;
5369     lidx  = 0;
5370     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5371     iremote[i].index = lidx;
5372     iremote[i].rank  = owner;
5373   }
5374   /* Create SF to communicate how many nonzero columns for each row */
5375   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5376   /* SF will figure out the number of nonzero colunms for each row, and their
5377    * offsets
5378    * */
5379   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5380   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5381   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5382 
5383   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5384   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5385   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5386   roffsets[0] = 0;
5387   roffsets[1] = 0;
5388   for (i=0;i<plocalsize;i++) {
5389     /* diag */
5390     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5391     /* off diag */
5392     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5393     /* compute offsets so that we relative location for each row */
5394     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5395     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5396   }
5397   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5398   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5399   /* 'r' means root, and 'l' means leaf */
5400   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5401   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5402   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5403   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5404   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5405   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5406   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5407   dntotalcols = 0;
5408   ontotalcols = 0;
5409   ncol = 0;
5410   for (i=0;i<nrows;i++) {
5411     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5412     ncol = PetscMax(pnnz[i],ncol);
5413     /* diag */
5414     dntotalcols += nlcols[i*2+0];
5415     /* off diag */
5416     ontotalcols += nlcols[i*2+1];
5417   }
5418   /* We do not need to figure the right number of columns
5419    * since all the calculations will be done by going through the raw data
5420    * */
5421   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5422   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5423   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5424   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5425   /* diag */
5426   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5427   /* off diag */
5428   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5429   /* diag */
5430   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5431   /* off diag */
5432   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5433   dntotalcols = 0;
5434   ontotalcols = 0;
5435   ntotalcols  = 0;
5436   for (i=0;i<nrows;i++) {
5437     owner = 0;
5438     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5439     /* Set iremote for diag matrix */
5440     for (j=0;j<nlcols[i*2+0];j++) {
5441       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5442       iremote[dntotalcols].rank    = owner;
5443       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5444       ilocal[dntotalcols++]        = ntotalcols++;
5445     }
5446     /* off diag */
5447     for (j=0;j<nlcols[i*2+1];j++) {
5448       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5449       oiremote[ontotalcols].rank    = owner;
5450       oilocal[ontotalcols++]        = ntotalcols++;
5451     }
5452   }
5453   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5454   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5455   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5456   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5457   /* P serves as roots and P_oth is leaves
5458    * Diag matrix
5459    * */
5460   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5461   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5462   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5463 
5464   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5465   /* Off diag */
5466   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5467   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5468   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5469   /* We operate on the matrix internal data for saving memory */
5470   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5471   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5472   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5473   /* Convert to global indices for diag matrix */
5474   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5475   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5476   /* We want P_oth store global indices */
5477   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5478   /* Use memory scalable approach */
5479   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5480   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5481   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5482   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5483   /* Convert back to local indices */
5484   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5485   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5486   nout = 0;
5487   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5488   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5489   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5490   /* Exchange values */
5491   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5492   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5493   /* Stop PETSc from shrinking memory */
5494   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5495   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5498   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5499   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5500   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5501   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /*
5506  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5507  * This supports MPIAIJ and MAIJ
5508  * */
5509 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5510 {
5511   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5512   Mat_SeqAIJ            *p_oth;
5513   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5514   IS                    rows,map;
5515   PetscHMapI            hamp;
5516   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5517   MPI_Comm              comm;
5518   PetscSF               sf,osf;
5519   PetscBool             has;
5520   PetscErrorCode        ierr;
5521 
5522   PetscFunctionBegin;
5523   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5524   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5525   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5526    *  and then create a submatrix (that often is an overlapping matrix)
5527    * */
5528   if (reuse == MAT_INITIAL_MATRIX) {
5529     /* Use a hash table to figure out unique keys */
5530     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5531     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5532     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5533     count = 0;
5534     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5535     for (i=0;i<a->B->cmap->n;i++) {
5536       key  = a->garray[i]/dof;
5537       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5538       if (!has) {
5539         mapping[i] = count;
5540         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5541       } else {
5542         /* Current 'i' has the same value the previous step */
5543         mapping[i] = count-1;
5544       }
5545     }
5546     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5547     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5548     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5549     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5550     off = 0;
5551     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5552     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5553     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5554     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5555     /* In case, the matrix was already created but users want to recreate the matrix */
5556     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5557     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5558     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5559     ierr = ISDestroy(&map);CHKERRQ(ierr);
5560     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5561   } else if (reuse == MAT_REUSE_MATRIX) {
5562     /* If matrix was already created, we simply update values using SF objects
5563      * that as attached to the matrix ealier.
5564      *  */
5565     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5566     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5567     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5568     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5569     /* Update values in place */
5570     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5571     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5572     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5573     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5574   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5575   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 /*@C
5580     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5581 
5582     Collective on Mat
5583 
5584    Input Parameters:
5585 +    A,B - the matrices in mpiaij format
5586 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5587 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5588 
5589    Output Parameter:
5590 +    rowb, colb - index sets of rows and columns of B to extract
5591 -    B_seq - the sequential matrix generated
5592 
5593     Level: developer
5594 
5595 @*/
5596 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5597 {
5598   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5599   PetscErrorCode ierr;
5600   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5601   IS             isrowb,iscolb;
5602   Mat            *bseq=NULL;
5603 
5604   PetscFunctionBegin;
5605   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5606     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5607   }
5608   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5609 
5610   if (scall == MAT_INITIAL_MATRIX) {
5611     start = A->cmap->rstart;
5612     cmap  = a->garray;
5613     nzA   = a->A->cmap->n;
5614     nzB   = a->B->cmap->n;
5615     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5616     ncols = 0;
5617     for (i=0; i<nzB; i++) {  /* row < local row index */
5618       if (cmap[i] < start) idx[ncols++] = cmap[i];
5619       else break;
5620     }
5621     imark = i;
5622     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5623     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5624     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5625     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5626   } else {
5627     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5628     isrowb  = *rowb; iscolb = *colb;
5629     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5630     bseq[0] = *B_seq;
5631   }
5632   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5633   *B_seq = bseq[0];
5634   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5635   if (!rowb) {
5636     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5637   } else {
5638     *rowb = isrowb;
5639   }
5640   if (!colb) {
5641     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5642   } else {
5643     *colb = iscolb;
5644   }
5645   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5646   PetscFunctionReturn(0);
5647 }
5648 
5649 /*
5650     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5651     of the OFF-DIAGONAL portion of local A
5652 
5653     Collective on Mat
5654 
5655    Input Parameters:
5656 +    A,B - the matrices in mpiaij format
5657 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5658 
5659    Output Parameter:
5660 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5661 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5662 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5663 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5664 
5665     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5666      for this matrix. This is not desirable..
5667 
5668     Level: developer
5669 
5670 */
5671 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5672 {
5673   PetscErrorCode         ierr;
5674   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5675   Mat_SeqAIJ             *b_oth;
5676   VecScatter             ctx;
5677   MPI_Comm               comm;
5678   const PetscMPIInt      *rprocs,*sprocs;
5679   const PetscInt         *srow,*rstarts,*sstarts;
5680   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5681   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5682   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5683   MPI_Request            *rwaits = NULL,*swaits = NULL;
5684   MPI_Status             rstatus;
5685   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5686   PETSC_UNUSED PetscMPIInt jj;
5687 
5688   PetscFunctionBegin;
5689   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5690   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5691 
5692   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5693     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5694   }
5695   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5696   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5697 
5698   if (size == 1) {
5699     startsj_s = NULL;
5700     bufa_ptr  = NULL;
5701     *B_oth    = NULL;
5702     PetscFunctionReturn(0);
5703   }
5704 
5705   ctx = a->Mvctx;
5706   tag = ((PetscObject)ctx)->tag;
5707 
5708   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5709   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5710   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5711   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5712   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5713   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5714 
5715   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5716   if (scall == MAT_INITIAL_MATRIX) {
5717     /* i-array */
5718     /*---------*/
5719     /*  post receives */
5720     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5721     for (i=0; i<nrecvs; i++) {
5722       rowlen = rvalues + rstarts[i]*rbs;
5723       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5724       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5725     }
5726 
5727     /* pack the outgoing message */
5728     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5729 
5730     sstartsj[0] = 0;
5731     rstartsj[0] = 0;
5732     len         = 0; /* total length of j or a array to be sent */
5733     if (nsends) {
5734       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5735       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5736     }
5737     for (i=0; i<nsends; i++) {
5738       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5739       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5740       for (j=0; j<nrows; j++) {
5741         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5742         for (l=0; l<sbs; l++) {
5743           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5744 
5745           rowlen[j*sbs+l] = ncols;
5746 
5747           len += ncols;
5748           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5749         }
5750         k++;
5751       }
5752       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5753 
5754       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5755     }
5756     /* recvs and sends of i-array are completed */
5757     i = nrecvs;
5758     while (i--) {
5759       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5760     }
5761     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5762     ierr = PetscFree(svalues);CHKERRQ(ierr);
5763 
5764     /* allocate buffers for sending j and a arrays */
5765     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5766     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5767 
5768     /* create i-array of B_oth */
5769     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5770 
5771     b_othi[0] = 0;
5772     len       = 0; /* total length of j or a array to be received */
5773     k         = 0;
5774     for (i=0; i<nrecvs; i++) {
5775       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5776       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5777       for (j=0; j<nrows; j++) {
5778         b_othi[k+1] = b_othi[k] + rowlen[j];
5779         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5780         k++;
5781       }
5782       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5783     }
5784     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5785 
5786     /* allocate space for j and a arrrays of B_oth */
5787     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5788     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5789 
5790     /* j-array */
5791     /*---------*/
5792     /*  post receives of j-array */
5793     for (i=0; i<nrecvs; i++) {
5794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5795       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5796     }
5797 
5798     /* pack the outgoing message j-array */
5799     if (nsends) k = sstarts[0];
5800     for (i=0; i<nsends; i++) {
5801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5802       bufJ  = bufj+sstartsj[i];
5803       for (j=0; j<nrows; j++) {
5804         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5805         for (ll=0; ll<sbs; ll++) {
5806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5807           for (l=0; l<ncols; l++) {
5808             *bufJ++ = cols[l];
5809           }
5810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5811         }
5812       }
5813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5814     }
5815 
5816     /* recvs and sends of j-array are completed */
5817     i = nrecvs;
5818     while (i--) {
5819       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5820     }
5821     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5822   } else if (scall == MAT_REUSE_MATRIX) {
5823     sstartsj = *startsj_s;
5824     rstartsj = *startsj_r;
5825     bufa     = *bufa_ptr;
5826     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5827     b_otha   = b_oth->a;
5828 #if defined(PETSC_HAVE_DEVICE)
5829     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5830 #endif
5831   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5832 
5833   /* a-array */
5834   /*---------*/
5835   /*  post receives of a-array */
5836   for (i=0; i<nrecvs; i++) {
5837     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5838     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5839   }
5840 
5841   /* pack the outgoing message a-array */
5842   if (nsends) k = sstarts[0];
5843   for (i=0; i<nsends; i++) {
5844     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5845     bufA  = bufa+sstartsj[i];
5846     for (j=0; j<nrows; j++) {
5847       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5848       for (ll=0; ll<sbs; ll++) {
5849         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5850         for (l=0; l<ncols; l++) {
5851           *bufA++ = vals[l];
5852         }
5853         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5854       }
5855     }
5856     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5857   }
5858   /* recvs and sends of a-array are completed */
5859   i = nrecvs;
5860   while (i--) {
5861     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5862   }
5863   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5864   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5865 
5866   if (scall == MAT_INITIAL_MATRIX) {
5867     /* put together the new matrix */
5868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5869 
5870     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5871     /* Since these are PETSc arrays, change flags to free them as necessary. */
5872     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5873     b_oth->free_a  = PETSC_TRUE;
5874     b_oth->free_ij = PETSC_TRUE;
5875     b_oth->nonew   = 0;
5876 
5877     ierr = PetscFree(bufj);CHKERRQ(ierr);
5878     if (!startsj_s || !bufa_ptr) {
5879       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5880       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5881     } else {
5882       *startsj_s = sstartsj;
5883       *startsj_r = rstartsj;
5884       *bufa_ptr  = bufa;
5885     }
5886   }
5887 
5888   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5889   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5890   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5891   PetscFunctionReturn(0);
5892 }
5893 
5894 /*@C
5895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5896 
5897   Not Collective
5898 
5899   Input Parameters:
5900 . A - The matrix in mpiaij format
5901 
5902   Output Parameter:
5903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5904 . colmap - A map from global column index to local index into lvec
5905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5906 
5907   Level: developer
5908 
5909 @*/
5910 #if defined(PETSC_USE_CTABLE)
5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5912 #else
5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5914 #endif
5915 {
5916   Mat_MPIAIJ *a;
5917 
5918   PetscFunctionBegin;
5919   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5920   PetscValidPointer(lvec, 2);
5921   PetscValidPointer(colmap, 3);
5922   PetscValidPointer(multScatter, 4);
5923   a = (Mat_MPIAIJ*) A->data;
5924   if (lvec) *lvec = a->lvec;
5925   if (colmap) *colmap = a->colmap;
5926   if (multScatter) *multScatter = a->Mvctx;
5927   PetscFunctionReturn(0);
5928 }
5929 
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5933 #if defined(PETSC_HAVE_MKL_SPARSE)
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5935 #endif
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5938 #if defined(PETSC_HAVE_ELEMENTAL)
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5940 #endif
5941 #if defined(PETSC_HAVE_SCALAPACK)
5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 #if defined(PETSC_HAVE_HYPRE)
5945 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5946 #endif
5947 #if defined(PETSC_HAVE_CUDA)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5952 #endif
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5954 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5955 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5956 
5957 /*
5958     Computes (B'*A')' since computing B*A directly is untenable
5959 
5960                n                       p                          p
5961         [             ]       [             ]         [                 ]
5962       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5963         [             ]       [             ]         [                 ]
5964 
5965 */
5966 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5967 {
5968   PetscErrorCode ierr;
5969   Mat            At,Bt,Ct;
5970 
5971   PetscFunctionBegin;
5972   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5973   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5974   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5975   ierr = MatDestroy(&At);CHKERRQ(ierr);
5976   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5977   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5978   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5979   PetscFunctionReturn(0);
5980 }
5981 
5982 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5983 {
5984   PetscErrorCode ierr;
5985   PetscBool      cisdense;
5986 
5987   PetscFunctionBegin;
5988   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5989   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5990   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5991   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5992   if (!cisdense) {
5993     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5994   }
5995   ierr = MatSetUp(C);CHKERRQ(ierr);
5996 
5997   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5998   PetscFunctionReturn(0);
5999 }
6000 
6001 /* ----------------------------------------------------------------*/
6002 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6003 {
6004   Mat_Product *product = C->product;
6005   Mat         A = product->A,B=product->B;
6006 
6007   PetscFunctionBegin;
6008   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6009     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6010 
6011   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6012   C->ops->productsymbolic = MatProductSymbolic_AB;
6013   PetscFunctionReturn(0);
6014 }
6015 
6016 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6017 {
6018   PetscErrorCode ierr;
6019   Mat_Product    *product = C->product;
6020 
6021   PetscFunctionBegin;
6022   if (product->type == MATPRODUCT_AB) {
6023     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6024   }
6025   PetscFunctionReturn(0);
6026 }
6027 /* ----------------------------------------------------------------*/
6028 
6029 /*MC
6030    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6031 
6032    Options Database Keys:
6033 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6034 
6035    Level: beginner
6036 
6037    Notes:
6038     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6039     in this case the values associated with the rows and columns one passes in are set to zero
6040     in the matrix
6041 
6042     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6043     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6044 
6045 .seealso: MatCreateAIJ()
6046 M*/
6047 
6048 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6049 {
6050   Mat_MPIAIJ     *b;
6051   PetscErrorCode ierr;
6052   PetscMPIInt    size;
6053 
6054   PetscFunctionBegin;
6055   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6056 
6057   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6058   B->data       = (void*)b;
6059   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6060   B->assembled  = PETSC_FALSE;
6061   B->insertmode = NOT_SET_VALUES;
6062   b->size       = size;
6063 
6064   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6065 
6066   /* build cache for off array entries formed */
6067   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6068 
6069   b->donotstash  = PETSC_FALSE;
6070   b->colmap      = NULL;
6071   b->garray      = NULL;
6072   b->roworiented = PETSC_TRUE;
6073 
6074   /* stuff used for matrix vector multiply */
6075   b->lvec  = NULL;
6076   b->Mvctx = NULL;
6077 
6078   /* stuff for MatGetRow() */
6079   b->rowindices   = NULL;
6080   b->rowvalues    = NULL;
6081   b->getrowactive = PETSC_FALSE;
6082 
6083   /* flexible pointer used in CUSPARSE classes */
6084   b->spptr = NULL;
6085 
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6096 #if defined(PETSC_HAVE_CUDA)
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6098 #endif
6099 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6101 #endif
6102 #if defined(PETSC_HAVE_MKL_SPARSE)
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6104 #endif
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6109 #if defined(PETSC_HAVE_ELEMENTAL)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6111 #endif
6112 #if defined(PETSC_HAVE_SCALAPACK)
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6114 #endif
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6117 #if defined(PETSC_HAVE_HYPRE)
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6120 #endif
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6123   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6124   PetscFunctionReturn(0);
6125 }
6126 
6127 /*@C
6128      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6129          and "off-diagonal" part of the matrix in CSR format.
6130 
6131    Collective
6132 
6133    Input Parameters:
6134 +  comm - MPI communicator
6135 .  m - number of local rows (Cannot be PETSC_DECIDE)
6136 .  n - This value should be the same as the local size used in creating the
6137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6138        calculated if N is given) For square matrices n is almost always m.
6139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6141 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6142 .   j - column indices
6143 .   a - matrix values
6144 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6145 .   oj - column indices
6146 -   oa - matrix values
6147 
6148    Output Parameter:
6149 .   mat - the matrix
6150 
6151    Level: advanced
6152 
6153    Notes:
6154        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6155        must free the arrays once the matrix has been destroyed and not before.
6156 
6157        The i and j indices are 0 based
6158 
6159        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6160 
6161        This sets local rows and cannot be used to set off-processor values.
6162 
6163        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6164        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6165        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6166        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6167        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6168        communication if it is known that only local entries will be set.
6169 
6170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6171           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6172 @*/
6173 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6174 {
6175   PetscErrorCode ierr;
6176   Mat_MPIAIJ     *maij;
6177 
6178   PetscFunctionBegin;
6179   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6180   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6181   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6182   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6183   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6184   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6185   maij = (Mat_MPIAIJ*) (*mat)->data;
6186 
6187   (*mat)->preallocated = PETSC_TRUE;
6188 
6189   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6190   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6191 
6192   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6193   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6194 
6195   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6196   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6197   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6198   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199 
6200   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6201   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6202   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6203   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6204   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6205   PetscFunctionReturn(0);
6206 }
6207 
6208 /*
6209     Special version for direct calls from Fortran
6210 */
6211 #include <petsc/private/fortranimpl.h>
6212 
6213 /* Change these macros so can be used in void function */
6214 #undef CHKERRQ
6215 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6216 #undef SETERRQ2
6217 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6218 #undef SETERRQ3
6219 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6220 #undef SETERRQ
6221 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6222 
6223 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6224 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6225 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6226 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6227 #else
6228 #endif
6229 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6230 {
6231   Mat            mat  = *mmat;
6232   PetscInt       m    = *mm, n = *mn;
6233   InsertMode     addv = *maddv;
6234   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6235   PetscScalar    value;
6236   PetscErrorCode ierr;
6237 
6238   MatCheckPreallocated(mat,1);
6239   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6240   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6241   {
6242     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6243     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6244     PetscBool roworiented = aij->roworiented;
6245 
6246     /* Some Variables required in the macro */
6247     Mat        A                    = aij->A;
6248     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6249     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6250     MatScalar  *aa                  = a->a;
6251     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6252     Mat        B                    = aij->B;
6253     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6254     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6255     MatScalar  *ba                  = b->a;
6256     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6257      * cannot use "#if defined" inside a macro. */
6258     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6259 
6260     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6261     PetscInt  nonew = a->nonew;
6262     MatScalar *ap1,*ap2;
6263 
6264     PetscFunctionBegin;
6265     for (i=0; i<m; i++) {
6266       if (im[i] < 0) continue;
6267       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6268       if (im[i] >= rstart && im[i] < rend) {
6269         row      = im[i] - rstart;
6270         lastcol1 = -1;
6271         rp1      = aj + ai[row];
6272         ap1      = aa + ai[row];
6273         rmax1    = aimax[row];
6274         nrow1    = ailen[row];
6275         low1     = 0;
6276         high1    = nrow1;
6277         lastcol2 = -1;
6278         rp2      = bj + bi[row];
6279         ap2      = ba + bi[row];
6280         rmax2    = bimax[row];
6281         nrow2    = bilen[row];
6282         low2     = 0;
6283         high2    = nrow2;
6284 
6285         for (j=0; j<n; j++) {
6286           if (roworiented) value = v[i*n+j];
6287           else value = v[i+j*m];
6288           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6289           if (in[j] >= cstart && in[j] < cend) {
6290             col = in[j] - cstart;
6291             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6292 #if defined(PETSC_HAVE_DEVICE)
6293             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6294 #endif
6295           } else if (in[j] < 0) continue;
6296           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6297             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6298             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6299           } else {
6300             if (mat->was_assembled) {
6301               if (!aij->colmap) {
6302                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6303               }
6304 #if defined(PETSC_USE_CTABLE)
6305               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6306               col--;
6307 #else
6308               col = aij->colmap[in[j]] - 1;
6309 #endif
6310               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6311                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6312                 col  =  in[j];
6313                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6314                 B        = aij->B;
6315                 b        = (Mat_SeqAIJ*)B->data;
6316                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6317                 rp2      = bj + bi[row];
6318                 ap2      = ba + bi[row];
6319                 rmax2    = bimax[row];
6320                 nrow2    = bilen[row];
6321                 low2     = 0;
6322                 high2    = nrow2;
6323                 bm       = aij->B->rmap->n;
6324                 ba       = b->a;
6325                 inserted = PETSC_FALSE;
6326               }
6327             } else col = in[j];
6328             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6329 #if defined(PETSC_HAVE_DEVICE)
6330             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6331 #endif
6332           }
6333         }
6334       } else if (!aij->donotstash) {
6335         if (roworiented) {
6336           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6337         } else {
6338           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6339         }
6340       }
6341     }
6342   }
6343   PetscFunctionReturnVoid();
6344 }
6345 
6346 typedef struct {
6347   Mat       *mp;    /* intermediate products */
6348   PetscBool *mptmp; /* is the intermediate product temporary ? */
6349   PetscInt  cp;     /* number of intermediate products */
6350 
6351   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6352   PetscInt    *startsj_s,*startsj_r;
6353   PetscScalar *bufa;
6354   Mat         P_oth;
6355 
6356   /* may take advantage of merging product->B */
6357   Mat Bloc;
6358 
6359   /* cusparse does not have support to split between symbolic and numeric phases
6360      When api_user is true, we don't need to update the numerical values
6361      of the temporary storage */
6362   PetscBool reusesym;
6363 
6364   /* support for COO values insertion */
6365   PetscScalar  *coo_v,*coo_w;
6366   PetscInt     **own;
6367   PetscInt     **off;
6368   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6369   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6370   PetscMemType mtype;
6371 
6372   /* customization */
6373   PetscBool abmerge;
6374   PetscBool P_oth_bind;
6375 } MatMatMPIAIJBACKEND;
6376 
6377 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6378 {
6379   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6380   PetscInt            i;
6381   PetscErrorCode      ierr;
6382 
6383   PetscFunctionBegin;
6384   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6385   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6386   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6387   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6388   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6389   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6390   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6391   for (i = 0; i < mmdata->cp; i++) {
6392     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6393   }
6394   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6399   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6400   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6401   PetscFunctionReturn(0);
6402 }
6403 
6404 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6405 {
6406   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6407   PetscErrorCode ierr;
6408 
6409   PetscFunctionBegin;
6410   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6411   if (f) {
6412     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6413   } else {
6414     const PetscScalar *vv;
6415 
6416     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6417     if (n && idx) {
6418       PetscScalar    *w = v;
6419       const PetscInt *oi = idx;
6420       PetscInt       j;
6421 
6422       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6423     } else {
6424       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6425     }
6426     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6427   }
6428   PetscFunctionReturn(0);
6429 }
6430 
6431 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6432 {
6433   MatMatMPIAIJBACKEND *mmdata;
6434   PetscInt            i,n_d,n_o;
6435   PetscErrorCode      ierr;
6436 
6437   PetscFunctionBegin;
6438   MatCheckProduct(C,1);
6439   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6440   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6441   if (!mmdata->reusesym) { /* update temporary matrices */
6442     if (mmdata->P_oth) {
6443       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6444     }
6445     if (mmdata->Bloc) {
6446       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6447     }
6448   }
6449   mmdata->reusesym = PETSC_FALSE;
6450 
6451   for (i = 0; i < mmdata->cp; i++) {
6452     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6453     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6454   }
6455   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6456     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6457 
6458     if (mmdata->mptmp[i]) continue;
6459     if (noff) {
6460       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6461 
6462       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6463       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6464       n_o += noff;
6465       n_d += nown;
6466     } else {
6467       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6468 
6469       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6470       n_d += mm->nz;
6471     }
6472   }
6473   if (mmdata->hasoffproc) { /* offprocess insertion */
6474     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6475     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6476   }
6477   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6478   PetscFunctionReturn(0);
6479 }
6480 
6481 /* Support for Pt * A, A * P, or Pt * A * P */
6482 #define MAX_NUMBER_INTERMEDIATE 4
6483 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6484 {
6485   Mat_Product            *product = C->product;
6486   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6487   Mat_MPIAIJ             *a,*p;
6488   MatMatMPIAIJBACKEND    *mmdata;
6489   ISLocalToGlobalMapping P_oth_l2g = NULL;
6490   IS                     glob = NULL;
6491   const char             *prefix;
6492   char                   pprefix[256];
6493   const PetscInt         *globidx,*P_oth_idx;
6494   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6495   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6496   MatProductType         ptype;
6497   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6498   PetscMPIInt            size;
6499   PetscErrorCode         ierr;
6500 
6501   PetscFunctionBegin;
6502   MatCheckProduct(C,1);
6503   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6504   ptype = product->type;
6505   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6506   switch (ptype) {
6507   case MATPRODUCT_AB:
6508     A = product->A;
6509     P = product->B;
6510     m = A->rmap->n;
6511     n = P->cmap->n;
6512     M = A->rmap->N;
6513     N = P->cmap->N;
6514     break;
6515   case MATPRODUCT_AtB:
6516     P = product->A;
6517     A = product->B;
6518     m = P->cmap->n;
6519     n = A->cmap->n;
6520     M = P->cmap->N;
6521     N = A->cmap->N;
6522     hasoffproc = PETSC_TRUE;
6523     break;
6524   case MATPRODUCT_PtAP:
6525     A = product->A;
6526     P = product->B;
6527     m = P->cmap->n;
6528     n = P->cmap->n;
6529     M = P->cmap->N;
6530     N = P->cmap->N;
6531     hasoffproc = PETSC_TRUE;
6532     break;
6533   default:
6534     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6535   }
6536   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6537   if (size == 1) hasoffproc = PETSC_FALSE;
6538 
6539   /* defaults */
6540   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6541     mp[i]    = NULL;
6542     mptmp[i] = PETSC_FALSE;
6543     rmapt[i] = -1;
6544     cmapt[i] = -1;
6545     rmapa[i] = NULL;
6546     cmapa[i] = NULL;
6547   }
6548 
6549   /* customization */
6550   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6551   mmdata->reusesym = product->api_user;
6552   if (ptype == MATPRODUCT_AB) {
6553     if (product->api_user) {
6554       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6555       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6556       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6557       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6558     } else {
6559       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6560       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6561       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6562       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6563     }
6564   } else if (ptype == MATPRODUCT_PtAP) {
6565     if (product->api_user) {
6566       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6567       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6568       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6569     } else {
6570       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6571       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6572       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6573     }
6574   }
6575   a = (Mat_MPIAIJ*)A->data;
6576   p = (Mat_MPIAIJ*)P->data;
6577   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6578   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6579   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6580   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6581   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6582   switch (ptype) {
6583   case MATPRODUCT_AB: /* A * P */
6584     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6585 
6586     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6587       /* P is product->B */
6588       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6589       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6590       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6591       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6592       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6593       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6594       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6595       mp[cp]->product->api_user = product->api_user;
6596       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6597       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6598       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6599       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6600       rmapt[cp] = 1;
6601       cmapt[cp] = 2;
6602       cmapa[cp] = globidx;
6603       mptmp[cp] = PETSC_FALSE;
6604       cp++;
6605     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6606       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6607       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6608       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6609       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6610       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6611       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6612       mp[cp]->product->api_user = product->api_user;
6613       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6614       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6615       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6616       rmapt[cp] = 1;
6617       cmapt[cp] = 1;
6618       mptmp[cp] = PETSC_FALSE;
6619       cp++;
6620       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6621       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6622       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6623       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6624       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6625       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6626       mp[cp]->product->api_user = product->api_user;
6627       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6628       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6629       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6630       rmapt[cp] = 1;
6631       cmapt[cp] = 2;
6632       cmapa[cp] = p->garray;
6633       mptmp[cp] = PETSC_FALSE;
6634       cp++;
6635     }
6636     if (mmdata->P_oth) {
6637       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6638       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6639       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6640       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6641       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6642       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6643       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6644       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6645       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6646       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6647       mp[cp]->product->api_user = product->api_user;
6648       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6649       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6650       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6651       rmapt[cp] = 1;
6652       cmapt[cp] = 2;
6653       cmapa[cp] = P_oth_idx;
6654       mptmp[cp] = PETSC_FALSE;
6655       cp++;
6656     }
6657     break;
6658   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6659     /* A is product->B */
6660     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6661     if (A == P) {
6662       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6663       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6664       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6665       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6666       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6667       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6668       mp[cp]->product->api_user = product->api_user;
6669       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6670       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6671       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6672       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6673       rmapt[cp] = 2;
6674       rmapa[cp] = globidx;
6675       cmapt[cp] = 2;
6676       cmapa[cp] = globidx;
6677       mptmp[cp] = PETSC_FALSE;
6678       cp++;
6679     } else {
6680       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6681       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6682       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6683       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6684       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6685       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6686       mp[cp]->product->api_user = product->api_user;
6687       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6688       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6689       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6690       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6691       rmapt[cp] = 1;
6692       cmapt[cp] = 2;
6693       cmapa[cp] = globidx;
6694       mptmp[cp] = PETSC_FALSE;
6695       cp++;
6696       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6697       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6698       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6699       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6700       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6701       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6702       mp[cp]->product->api_user = product->api_user;
6703       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6704       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6705       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6706       rmapt[cp] = 2;
6707       rmapa[cp] = p->garray;
6708       cmapt[cp] = 2;
6709       cmapa[cp] = globidx;
6710       mptmp[cp] = PETSC_FALSE;
6711       cp++;
6712     }
6713     break;
6714   case MATPRODUCT_PtAP:
6715     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6716     /* P is product->B */
6717     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6718     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6719     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6720     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6721     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6722     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6723     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6724     mp[cp]->product->api_user = product->api_user;
6725     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6726     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6727     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6728     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6729     rmapt[cp] = 2;
6730     rmapa[cp] = globidx;
6731     cmapt[cp] = 2;
6732     cmapa[cp] = globidx;
6733     mptmp[cp] = PETSC_FALSE;
6734     cp++;
6735     if (mmdata->P_oth) {
6736       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6737       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6738       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6739       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6740       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6741       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6742       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6743       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6744       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6745       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6746       mp[cp]->product->api_user = product->api_user;
6747       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6748       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6749       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6750       mptmp[cp] = PETSC_TRUE;
6751       cp++;
6752       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6753       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6754       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6755       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6756       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6757       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6758       mp[cp]->product->api_user = product->api_user;
6759       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6760       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6761       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6762       rmapt[cp] = 2;
6763       rmapa[cp] = globidx;
6764       cmapt[cp] = 2;
6765       cmapa[cp] = P_oth_idx;
6766       mptmp[cp] = PETSC_FALSE;
6767       cp++;
6768     }
6769     break;
6770   default:
6771     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6772   }
6773   /* sanity check */
6774   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6775 
6776   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6777   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6778   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6779   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6780   mmdata->cp = cp;
6781   C->product->data       = mmdata;
6782   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6783   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6784 
6785   /* memory type */
6786   mmdata->mtype = PETSC_MEMTYPE_HOST;
6787   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6788   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6789   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6790   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6791   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6792 
6793   /* prepare coo coordinates for values insertion */
6794   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6795     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6796     if (mptmp[cp]) continue;
6797     if (rmapt[cp] == 2 && hasoffproc) {
6798       const PetscInt *rmap = rmapa[cp];
6799       const PetscInt mr = mp[cp]->rmap->n;
6800       const PetscInt rs = C->rmap->rstart;
6801       const PetscInt re = C->rmap->rend;
6802       const PetscInt *ii  = mm->i;
6803       for (i = 0; i < mr; i++) {
6804         const PetscInt gr = rmap[i];
6805         const PetscInt nz = ii[i+1] - ii[i];
6806         if (gr < rs || gr >= re) ncoo_o += nz;
6807         else ncoo_oown += nz;
6808       }
6809     } else ncoo_d += mm->nz;
6810   }
6811   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6812   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6813   if (hasoffproc) { /* handle offproc values insertion */
6814     PetscSF  msf;
6815     PetscInt ncoo2,*coo_i2,*coo_j2;
6816 
6817     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6818     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6819     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6820     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6821       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6822       PetscInt   *idxoff = mmdata->off[cp];
6823       PetscInt   *idxown = mmdata->own[cp];
6824       if (!mptmp[cp] && rmapt[cp] == 2) {
6825         const PetscInt *rmap = rmapa[cp];
6826         const PetscInt *cmap = cmapa[cp];
6827         const PetscInt *ii  = mm->i;
6828         PetscInt       *coi = coo_i + ncoo_o;
6829         PetscInt       *coj = coo_j + ncoo_o;
6830         const PetscInt mr = mp[cp]->rmap->n;
6831         const PetscInt rs = C->rmap->rstart;
6832         const PetscInt re = C->rmap->rend;
6833         const PetscInt cs = C->cmap->rstart;
6834         for (i = 0; i < mr; i++) {
6835           const PetscInt *jj = mm->j + ii[i];
6836           const PetscInt gr  = rmap[i];
6837           const PetscInt nz  = ii[i+1] - ii[i];
6838           if (gr < rs || gr >= re) {
6839             for (j = ii[i]; j < ii[i+1]; j++) {
6840               *coi++ = gr;
6841               *idxoff++ = j;
6842             }
6843             if (!cmapt[cp]) { /* already global */
6844               for (j = 0; j < nz; j++) *coj++ = jj[j];
6845             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6846               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6847             } else { /* offdiag */
6848               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6849             }
6850             ncoo_o += nz;
6851           } else {
6852             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6853           }
6854         }
6855       }
6856       mmdata->off[cp + 1] = idxoff;
6857       mmdata->own[cp + 1] = idxown;
6858     }
6859 
6860     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6861     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6862     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6863     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6864     ncoo = ncoo_d + ncoo_oown + ncoo2;
6865     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6866     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6867     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6868     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6869     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6870     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6871     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6872     coo_i = coo_i2;
6873     coo_j = coo_j2;
6874   } else { /* no offproc values insertion */
6875     ncoo = ncoo_d;
6876     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6877 
6878     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6879     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6880     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6881   }
6882   mmdata->hasoffproc = hasoffproc;
6883 
6884   /* on-process indices */
6885   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6886     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6887     PetscInt       *coi = coo_i + ncoo_d;
6888     PetscInt       *coj = coo_j + ncoo_d;
6889     const PetscInt *jj  = mm->j;
6890     const PetscInt *ii  = mm->i;
6891     const PetscInt *cmap = cmapa[cp];
6892     const PetscInt *rmap = rmapa[cp];
6893     const PetscInt mr = mp[cp]->rmap->n;
6894     const PetscInt rs = C->rmap->rstart;
6895     const PetscInt re = C->rmap->rend;
6896     const PetscInt cs = C->cmap->rstart;
6897 
6898     if (mptmp[cp]) continue;
6899     if (rmapt[cp] == 1) {
6900       for (i = 0; i < mr; i++) {
6901         const PetscInt gr = i + rs;
6902         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6903       }
6904       /* columns coo */
6905       if (!cmapt[cp]) {
6906         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6907       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6908         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
6909       } else { /* offdiag */
6910         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6911       }
6912       ncoo_d += mm->nz;
6913     } else if (rmapt[cp] == 2) {
6914       for (i = 0; i < mr; i++) {
6915         const PetscInt *jj = mm->j + ii[i];
6916         const PetscInt gr  = rmap[i];
6917         const PetscInt nz  = ii[i+1] - ii[i];
6918         if (gr >= rs && gr < re) {
6919           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6920           if (!cmapt[cp]) { /* already global */
6921             for (j = 0; j < nz; j++) *coj++ = jj[j];
6922           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6923             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6924           } else { /* offdiag */
6925             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6926           }
6927           ncoo_d += nz;
6928         }
6929       }
6930     }
6931   }
6932   if (glob) {
6933     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6934   }
6935   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6936   if (P_oth_l2g) {
6937     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6938   }
6939   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6940   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6941 
6942   /* preallocate with COO data */
6943   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6944   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6945   PetscFunctionReturn(0);
6946 }
6947 
6948 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6949 {
6950   Mat_Product    *product = mat->product;
6951   PetscErrorCode ierr;
6952 #if defined(PETSC_HAVE_DEVICE)
6953   PetscBool      match = PETSC_FALSE;
6954   PetscBool      usecpu = PETSC_FALSE;
6955 #else
6956   PetscBool      match = PETSC_TRUE;
6957 #endif
6958 
6959   PetscFunctionBegin;
6960   MatCheckProduct(mat,1);
6961 #if defined(PETSC_HAVE_DEVICE)
6962   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6963     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6964   }
6965   if (match) { /* we can always fallback to the CPU if requested */
6966     switch (product->type) {
6967     case MATPRODUCT_AB:
6968       if (product->api_user) {
6969         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6970         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6971         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6972       } else {
6973         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6974         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6975         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6976       }
6977       break;
6978     case MATPRODUCT_AtB:
6979       if (product->api_user) {
6980         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
6981         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6982         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6983       } else {
6984         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
6985         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6986         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6987       }
6988       break;
6989     case MATPRODUCT_PtAP:
6990       if (product->api_user) {
6991         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6992         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6993         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6994       } else {
6995         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6996         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6997         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6998       }
6999       break;
7000     default:
7001       break;
7002     }
7003     match = (PetscBool)!usecpu;
7004   }
7005 #endif
7006   if (match) {
7007     switch (product->type) {
7008     case MATPRODUCT_AB:
7009     case MATPRODUCT_AtB:
7010     case MATPRODUCT_PtAP:
7011       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7012       break;
7013     default:
7014       break;
7015     }
7016   }
7017   /* fallback to MPIAIJ ops */
7018   if (!mat->ops->productsymbolic) {
7019     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7020   }
7021   PetscFunctionReturn(0);
7022 }
7023