xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5a884c48ab0c46bab83cd9bb8710f380fa6d8bcf)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
MatReset_MPIAIJ(Mat mat)16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
MatResetHash_MPIAIJ(Mat mat)39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
MatDestroy_MPIAIJ(Mat mat)53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * m,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * m,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
MatSetBlockSizes_MPIAIJ(Mat M,PetscInt rbs,PetscInt cbs)191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
MatFindNonzeroRows_MPIAIJ(Mat M,IS * keptrows)203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
MatFindZeroDiagonals_MPIAIJ(Mat M,IS * zrows)287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal * reductions)301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS * is)347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
MatCreateColmap_MPIAIJ_Private(Mat mat)385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow down the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
613               PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
618         }
619       }
620     } else {
621       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
626         } else {
627           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
628         }
629       }
630     }
631   }
632   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
633   PetscCall(MatSeqAIJRestoreArray(B, &ba));
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 /*
638     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
639     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
640     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
641 */
MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
643 {
644   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
645   Mat         A      = aij->A; /* diagonal part of the matrix */
646   Mat         B      = aij->B; /* off-diagonal part of the matrix */
647   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
648   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
649   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
650   PetscInt   *ailen = a->ilen, *aj = a->j;
651   PetscInt   *bilen = b->ilen, *bj = b->j;
652   PetscInt    am          = aij->A->rmap->n, j;
653   PetscInt    diag_so_far = 0, dnz;
654   PetscInt    offd_so_far = 0, onz;
655 
656   PetscFunctionBegin;
657   /* Iterate over all rows of the matrix */
658   for (j = 0; j < am; j++) {
659     dnz = onz = 0;
660     /*  Iterate over all non-zero columns of the current row */
661     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
662       /* If column is in the diagonal */
663       if (mat_j[col] >= cstart && mat_j[col] < cend) {
664         aj[diag_so_far++] = mat_j[col] - cstart;
665         dnz++;
666       } else { /* off-diagonal entries */
667         bj[offd_so_far++] = mat_j[col];
668         onz++;
669       }
670     }
671     ailen[j] = dnz;
672     bilen[j] = onz;
673   }
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 
677 /*
678     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
679     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
680     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
681     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
682     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
683 */
MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
685 {
686   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
687   Mat          A    = aij->A; /* diagonal part of the matrix */
688   Mat          B    = aij->B; /* off-diagonal part of the matrix */
689   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
690   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
691   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
692   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
693   PetscInt    *ailen = a->ilen, *aj = a->j;
694   PetscInt    *bilen = b->ilen, *bj = b->j;
695   PetscInt     am          = aij->A->rmap->n, j;
696   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
697   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
698   PetscScalar *aa = a->a, *ba = b->a;
699 
700   PetscFunctionBegin;
701   /* Iterate over all rows of the matrix */
702   for (j = 0; j < am; j++) {
703     dnz_row = onz_row = 0;
704     rowstart_offd     = full_offd_i[j];
705     rowstart_diag     = full_diag_i[j];
706     /*  Iterate over all non-zero columns of the current row */
707     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
708       /* If column is in the diagonal */
709       if (mat_j[col] >= cstart && mat_j[col] < cend) {
710         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
711         aa[rowstart_diag + dnz_row] = mat_a[col];
712         dnz_row++;
713       } else { /* off-diagonal entries */
714         bj[rowstart_offd + onz_row] = mat_j[col];
715         ba[rowstart_offd + onz_row] = mat_a[col];
716         onz_row++;
717       }
718     }
719     ailen[j] = dnz_row;
720     bilen[j] = onz_row;
721   }
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
728   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
729   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
730 
731   PetscFunctionBegin;
732   for (i = 0; i < m; i++) {
733     if (idxm[i] < 0) continue; /* negative row */
734     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
735     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
736     row = idxm[i] - rstart;
737     for (j = 0; j < n; j++) {
738       if (idxn[j] < 0) continue; /* negative column */
739       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
740       if (idxn[j] >= cstart && idxn[j] < cend) {
741         col = idxn[j] - cstart;
742         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
743       } else {
744         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
745 #if defined(PETSC_USE_CTABLE)
746         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
747         col--;
748 #else
749         col = aij->colmap[idxn[j]] - 1;
750 #endif
751         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
752         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
753       }
754     }
755   }
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
760 {
761   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
762   PetscInt    nstash, reallocs;
763 
764   PetscFunctionBegin;
765   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
766 
767   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
768   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
769   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
774 {
775   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
776   PetscMPIInt  n;
777   PetscInt     i, j, rstart, ncols, flg;
778   PetscInt    *row, *col;
779   PetscBool    all_assembled;
780   PetscScalar *val;
781 
782   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
783 
784   PetscFunctionBegin;
785   if (!aij->donotstash && !mat->nooffprocentries) {
786     while (1) {
787       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
788       if (!flg) break;
789 
790       for (i = 0; i < n;) {
791         /* Now identify the consecutive vals belonging to the same row */
792         for (j = i, rstart = row[j]; j < n; j++) {
793           if (row[j] != rstart) break;
794         }
795         if (j < n) ncols = j - i;
796         else ncols = n - i;
797         /* Now assemble all these values with a single function call */
798         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
799         i = j;
800       }
801     }
802     PetscCall(MatStashScatterEnd_Private(&mat->stash));
803   }
804 #if defined(PETSC_HAVE_DEVICE)
805   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
806   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
807   if (mat->boundtocpu) {
808     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
809     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
810   }
811 #endif
812   PetscCall(MatAssemblyBegin(aij->A, mode));
813   PetscCall(MatAssemblyEnd(aij->A, mode));
814 
815   /* determine if any process has disassembled, if so we must
816      also disassemble ourself, in order that we may reassemble. */
817   /*
818      if nonzero structure of submatrix B cannot change then we know that
819      no process disassembled thus we can skip this stuff
820   */
821   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
822     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
823     if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
824       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
825     }
826   }
827   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
828   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
829 #if defined(PETSC_HAVE_DEVICE)
830   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
831 #endif
832   PetscCall(MatAssemblyBegin(aij->B, mode));
833   PetscCall(MatAssemblyEnd(aij->B, mode));
834 
835   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
836 
837   aij->rowvalues = NULL;
838 
839   PetscCall(VecDestroy(&aij->diag));
840 
841   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
842   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
843     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
844     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
845   }
846 #if defined(PETSC_HAVE_DEVICE)
847   mat->offloadmask = PETSC_OFFLOAD_BOTH;
848 #endif
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
MatZeroEntries_MPIAIJ(Mat A)852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
855 
856   PetscFunctionBegin;
857   PetscCall(MatZeroEntries(l->A));
858   PetscCall(MatZeroEntries(l->B));
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
863 {
864   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
865   PetscInt   *lrows;
866   PetscInt    r, len;
867   PetscBool   cong;
868 
869   PetscFunctionBegin;
870   /* get locally owned rows */
871   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
872   PetscCall(MatHasCongruentLayouts(A, &cong));
873   /* fix right-hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
879     PetscCall(VecGetArrayRead(x, &xx));
880     PetscCall(VecGetArray(b, &bb));
881     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
882     PetscCall(VecRestoreArrayRead(x, &xx));
883     PetscCall(VecRestoreArray(b, &bb));
884   }
885 
886   if (diag != 0.0 && cong) {
887     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
890     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
891     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
892     PetscInt    nnwA, nnwB;
893     PetscBool   nnzA, nnzB;
894 
895     nnwA = aijA->nonew;
896     nnwB = aijB->nonew;
897     nnzA = aijA->keepnonzeropattern;
898     nnzB = aijB->keepnonzeropattern;
899     if (!nnzA) {
900       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
901       aijA->nonew = 0;
902     }
903     if (!nnzB) {
904       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
905       aijB->nonew = 0;
906     }
907     /* Must zero here before the next loop */
908     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
909     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
910     for (r = 0; r < len; ++r) {
911       const PetscInt row = lrows[r] + A->rmap->rstart;
912       if (row >= A->cmap->N) continue;
913       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
914     }
915     aijA->nonew = nnwA;
916     aijB->nonew = nnwB;
917   } else {
918     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
919     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
920   }
921   PetscCall(PetscFree(lrows));
922   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
923   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
927     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
928     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
929   }
930   PetscFunctionReturn(PETSC_SUCCESS);
931 }
932 
MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
934 {
935   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
936   PetscInt           n = A->rmap->n;
937   PetscInt           i, j, r, m, len = 0;
938   PetscInt          *lrows, *owners = A->rmap->range;
939   PetscMPIInt        p = 0;
940   PetscSFNode       *rrows;
941   PetscSF            sf;
942   const PetscScalar *xx;
943   PetscScalar       *bb, *mask, *aij_a;
944   Vec                xmask, lmask;
945   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
946   const PetscInt    *aj, *ii, *ridx;
947   PetscScalar       *aa;
948 
949   PetscFunctionBegin;
950   /* Create SF where leaves are input rows and roots are owned rows */
951   PetscCall(PetscMalloc1(n, &lrows));
952   for (r = 0; r < n; ++r) lrows[r] = -1;
953   PetscCall(PetscMalloc1(N, &rrows));
954   for (r = 0; r < N; ++r) {
955     const PetscInt idx = rows[r];
956     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
957     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
958       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
959     }
960     rrows[r].rank  = p;
961     rrows[r].index = rows[r] - owners[p];
962   }
963   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
964   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
965   /* Collect flags for rows to be zeroed */
966   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
967   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFDestroy(&sf));
969   /* Compress and put in row numbers */
970   for (r = 0; r < n; ++r)
971     if (lrows[r] >= 0) lrows[len++] = r;
972   /* zero diagonal part of matrix */
973   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
974   /* handle off-diagonal part of matrix */
975   PetscCall(MatCreateVecs(A, &xmask, NULL));
976   PetscCall(VecDuplicate(l->lvec, &lmask));
977   PetscCall(VecGetArray(xmask, &bb));
978   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
979   PetscCall(VecRestoreArray(xmask, &bb));
980   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
981   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecDestroy(&xmask));
983   if (x && b) { /* this code is buggy when the row and column layout don't match */
984     PetscBool cong;
985 
986     PetscCall(MatHasCongruentLayouts(A, &cong));
987     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
988     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
989     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecGetArrayRead(l->lvec, &xx));
991     PetscCall(VecGetArray(b, &bb));
992   }
993   PetscCall(VecGetArray(lmask, &mask));
994   /* remove zeroed rows of off-diagonal matrix */
995   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
996   ii = aij->i;
997   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
998   /* loop over all elements of off process part of matrix zeroing removed columns*/
999   if (aij->compressedrow.use) {
1000     m    = aij->compressedrow.nrows;
1001     ii   = aij->compressedrow.i;
1002     ridx = aij->compressedrow.rindex;
1003     for (i = 0; i < m; i++) {
1004       n  = ii[i + 1] - ii[i];
1005       aj = aij->j + ii[i];
1006       aa = aij_a + ii[i];
1007 
1008       for (j = 0; j < n; j++) {
1009         if (PetscAbsScalar(mask[*aj])) {
1010           if (b) bb[*ridx] -= *aa * xx[*aj];
1011           *aa = 0.0;
1012         }
1013         aa++;
1014         aj++;
1015       }
1016       ridx++;
1017     }
1018   } else { /* do not use compressed row format */
1019     m = l->B->rmap->n;
1020     for (i = 0; i < m; i++) {
1021       n  = ii[i + 1] - ii[i];
1022       aj = aij->j + ii[i];
1023       aa = aij_a + ii[i];
1024       for (j = 0; j < n; j++) {
1025         if (PetscAbsScalar(mask[*aj])) {
1026           if (b) bb[i] -= *aa * xx[*aj];
1027           *aa = 0.0;
1028         }
1029         aa++;
1030         aj++;
1031       }
1032     }
1033   }
1034   if (x && b) {
1035     PetscCall(VecRestoreArray(b, &bb));
1036     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1037   }
1038   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1039   PetscCall(VecRestoreArray(lmask, &mask));
1040   PetscCall(VecDestroy(&lmask));
1041   PetscCall(PetscFree(lrows));
1042 
1043   /* only change matrix nonzero state if pattern was allowed to be changed */
1044   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1045     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1046     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1047   }
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1052 {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1054   PetscInt    nt;
1055   VecScatter  Mvctx = a->Mvctx;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(VecGetLocalSize(xx, &nt));
1059   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1060   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1061   PetscUseTypeMethod(a->A, mult, xx, yy);
1062   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1063   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1068 {
1069   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1070 
1071   PetscFunctionBegin;
1072   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1073   PetscFunctionReturn(PETSC_SUCCESS);
1074 }
1075 
MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1077 {
1078   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1079   VecScatter  Mvctx = a->Mvctx;
1080 
1081   PetscFunctionBegin;
1082   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1083   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1084   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1085   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   /* do nondiagonal part */
1095   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1096   /* do local part */
1097   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1098   /* add partial results together */
1099   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1100   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool * f)1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1105 {
1106   MPI_Comm    comm;
1107   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1108   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1109   IS          Me, Notme;
1110   PetscInt    M, N, first, last, *notme, i;
1111   PetscBool   lf;
1112   PetscMPIInt size;
1113 
1114   PetscFunctionBegin;
1115   /* Easy test: symmetric diagonal block */
1116   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1117   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1118   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1119   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1120   PetscCallMPI(MPI_Comm_size(comm, &size));
1121   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1122 
1123   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1124   PetscCall(MatGetSize(Amat, &M, &N));
1125   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1126   PetscCall(PetscMalloc1(N - last + first, &notme));
1127   for (i = 0; i < first; i++) notme[i] = i;
1128   for (i = last; i < M; i++) notme[i - last + first] = i;
1129   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1130   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1131   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1132   Aoff = Aoffs[0];
1133   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1134   Boff = Boffs[0];
1135   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1136   PetscCall(MatDestroyMatrices(1, &Aoffs));
1137   PetscCall(MatDestroyMatrices(1, &Boffs));
1138   PetscCall(ISDestroy(&Me));
1139   PetscCall(ISDestroy(&Notme));
1140   PetscCall(PetscFree(notme));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   /* do nondiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1151   /* do local part */
1152   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1153   /* add partial results together */
1154   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1155   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 /*
1160   This only works correctly for square matrices where the subblock A->A is the
1161    diagonal block
1162 */
MatGetDiagonal_MPIAIJ(Mat A,Vec v)1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1169   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1170   PetscCall(MatGetDiagonal(a->A, v));
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
MatScale_MPIAIJ(Mat A,PetscScalar aa)1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1175 {
1176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatScale(a->A, aa));
1180   PetscCall(MatScale(a->B, aa));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1185 {
1186   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1187   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1188   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1189   const PetscInt    *garray = aij->garray;
1190   const PetscScalar *aa, *ba;
1191   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1192   PetscInt64         nz, hnz;
1193   PetscInt          *rowlens;
1194   PetscInt          *colidxs;
1195   PetscScalar       *matvals;
1196   PetscMPIInt        rank;
1197 
1198   PetscFunctionBegin;
1199   PetscCall(PetscViewerSetUp(viewer));
1200 
1201   M  = mat->rmap->N;
1202   N  = mat->cmap->N;
1203   m  = mat->rmap->n;
1204   rs = mat->rmap->rstart;
1205   cs = mat->cmap->rstart;
1206   nz = A->nz + B->nz;
1207 
1208   /* write matrix header */
1209   header[0] = MAT_FILE_CLASSID;
1210   header[1] = M;
1211   header[2] = N;
1212   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1213   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1214   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1215   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1216 
1217   /* fill in and store row lengths  */
1218   PetscCall(PetscMalloc1(m, &rowlens));
1219   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1220   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1221   PetscCall(PetscFree(rowlens));
1222 
1223   /* fill in and store column indices */
1224   PetscCall(PetscMalloc1(nz, &colidxs));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       colidxs[cnt++] = garray[B->j[jb]];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1231     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1232   }
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1235   PetscCall(PetscFree(colidxs));
1236 
1237   /* fill in and store nonzero values */
1238   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1239   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1240   PetscCall(PetscMalloc1(nz, &matvals));
1241   for (cnt = 0, i = 0; i < m; i++) {
1242     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1243       if (garray[B->j[jb]] > cs) break;
1244       matvals[cnt++] = ba[jb];
1245     }
1246     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1247     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1248   }
1249   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1251   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1252   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1253   PetscCall(PetscFree(matvals));
1254 
1255   /* write block size option to the viewer's .info file */
1256   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 #include <petscdraw.h>
MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1264   PetscMPIInt       rank = aij->rank, size = aij->size;
1265   PetscBool         isdraw, isascii, isbinary;
1266   PetscViewer       sviewer;
1267   PetscViewerFormat format;
1268 
1269   PetscFunctionBegin;
1270   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1273   if (isascii) {
1274     PetscCall(PetscViewerGetFormat(viewer, &format));
1275     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1276       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1277       PetscCall(PetscMalloc1(size, &nz));
1278       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1279       for (i = 0; i < size; i++) {
1280         nmax = PetscMax(nmax, nz[i]);
1281         nmin = PetscMin(nmin, nz[i]);
1282         navg += nz[i];
1283       }
1284       PetscCall(PetscFree(nz));
1285       navg = navg / size;
1286       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1287       PetscFunctionReturn(PETSC_SUCCESS);
1288     }
1289     PetscCall(PetscViewerGetFormat(viewer, &format));
1290     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291       MatInfo   info;
1292       PetscInt *inodes = NULL;
1293 
1294       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1295       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1296       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1297       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1298       if (!inodes) {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1300                                                      info.memory));
1301       } else {
1302         PetscCall(
1303           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1304       }
1305       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1306       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1307       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1308       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1309       PetscCall(PetscViewerFlush(viewer));
1310       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1311       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1312       PetscCall(VecScatterView(aij->Mvctx, viewer));
1313       PetscFunctionReturn(PETSC_SUCCESS);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt inodecount, inodelimit, *inodes;
1316       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1317       if (inodes) {
1318         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1319       } else {
1320         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1321       }
1322       PetscFunctionReturn(PETSC_SUCCESS);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(PETSC_SUCCESS);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1329       PetscCall(MatView(aij->A, viewer));
1330     } else {
1331       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1332     }
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isascii && size == 1) {
1335     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1336     PetscCall(MatView(aij->A, viewer));
1337     PetscFunctionReturn(PETSC_SUCCESS);
1338   } else if (isdraw) {
1339     PetscDraw draw;
1340     PetscBool isnull;
1341     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1342     PetscCall(PetscDrawIsNull(draw, &isnull));
1343     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1344   }
1345 
1346   { /* assemble the entire matrix onto first processor */
1347     Mat A = NULL, Av;
1348     IS  isrow, iscol;
1349 
1350     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1352     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1353     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1354     /*  The commented code uses MatCreateSubMatrices instead */
1355     /*
1356     Mat *AA, A = NULL, Av;
1357     IS  isrow,iscol;
1358 
1359     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1361     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1362     if (rank == 0) {
1363        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1364        A    = AA[0];
1365        Av   = AA[0];
1366     }
1367     PetscCall(MatDestroySubMatrices(1,&AA));
1368 */
1369     PetscCall(ISDestroy(&iscol));
1370     PetscCall(ISDestroy(&isrow));
1371     /*
1372        Everyone has to call to draw the matrix since the graphics waits are
1373        synchronized across all processors that share the PetscDraw object
1374     */
1375     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     if (rank == 0) {
1377       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1378       PetscCall(MatView_SeqAIJ(Av, sviewer));
1379     }
1380     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
MatView_MPIAIJ(Mat mat,PetscViewer viewer)1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1387 {
1388   PetscBool isascii, isdraw, issocket, isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1395   if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1400 {
1401   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1402   Vec         bb1 = NULL;
1403   PetscBool   hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1408     PetscFunctionReturn(PETSC_SUCCESS);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1412 
1413   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1414     if (flag & SOR_ZERO_INITIAL_GUESS) {
1415       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1416       its--;
1417     }
1418 
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1447     if (flag & SOR_ZERO_INITIAL_GUESS) {
1448       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1449       its--;
1450     }
1451     while (its--) {
1452       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454 
1455       /* update rhs: bb1 = bb - B*x */
1456       PetscCall(VecScale(mat->lvec, -1.0));
1457       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1458 
1459       /* local sweep */
1460       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1461     }
1462   } else if (flag & SOR_EISENSTAT) {
1463     Vec xx1;
1464 
1465     PetscCall(VecDuplicate(bb, &xx1));
1466     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1467 
1468     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     if (!mat->diag) {
1471       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1472       PetscCall(MatGetDiagonal(matin, mat->diag));
1473     }
1474     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1475     if (hasop) {
1476       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1477     } else {
1478       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1479     }
1480     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1481 
1482     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1483 
1484     /* local sweep */
1485     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1486     PetscCall(VecAXPY(xx, 1.0, xx1));
1487     PetscCall(VecDestroy(&xx1));
1488   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1489 
1490   PetscCall(VecDestroy(&bb1));
1491 
1492   matin->factorerrortype = mat->A->factorerrortype;
1493   PetscFunctionReturn(PETSC_SUCCESS);
1494 }
1495 
MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat * B)1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1497 {
1498   Mat             aA, aB, Aperm;
1499   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1500   PetscScalar    *aa, *ba;
1501   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1502   PetscSF         rowsf, sf;
1503   IS              parcolp = NULL;
1504   PetscBool       done;
1505 
1506   PetscFunctionBegin;
1507   PetscCall(MatGetLocalSize(A, &m, &n));
1508   PetscCall(ISGetIndices(rowp, &rwant));
1509   PetscCall(ISGetIndices(colp, &cwant));
1510   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1511 
1512   /* Invert row permutation to find out where my rows should go */
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1514   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1515   PetscCall(PetscSFSetFromOptions(rowsf));
1516   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1517   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1518   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519 
1520   /* Invert column permutation to find out where my columns should go */
1521   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1522   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1523   PetscCall(PetscSFSetFromOptions(sf));
1524   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1525   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1526   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFDestroy(&sf));
1528 
1529   PetscCall(ISRestoreIndices(rowp, &rwant));
1530   PetscCall(ISRestoreIndices(colp, &cwant));
1531   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1532 
1533   /* Find out where my gcols should go */
1534   PetscCall(MatGetSize(aB, NULL, &ng));
1535   PetscCall(PetscMalloc1(ng, &gcdest));
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1540   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFDestroy(&sf));
1542 
1543   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1544   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1545   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1546   for (i = 0; i < m; i++) {
1547     PetscInt    row = rdest[i];
1548     PetscMPIInt rowner;
1549     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1550     for (j = ai[i]; j < ai[i + 1]; j++) {
1551       PetscInt    col = cdest[aj[j]];
1552       PetscMPIInt cowner;
1553       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1554       if (rowner == cowner) dnnz[i]++;
1555       else onnz[i]++;
1556     }
1557     for (j = bi[i]; j < bi[i + 1]; j++) {
1558       PetscInt    col = gcdest[bj[j]];
1559       PetscMPIInt cowner;
1560       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1561       if (rowner == cowner) dnnz[i]++;
1562       else onnz[i]++;
1563     }
1564   }
1565   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1566   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&rowsf));
1570 
1571   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1572   PetscCall(MatSeqAIJGetArray(aA, &aa));
1573   PetscCall(MatSeqAIJGetArray(aB, &ba));
1574   for (i = 0; i < m; i++) {
1575     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1576     PetscInt  j0, rowlen;
1577     rowlen = ai[i + 1] - ai[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1579       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1581     }
1582     rowlen = bi[i + 1] - bi[i];
1583     for (j0 = j = 0; j < rowlen; j0 = j) {
1584       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1585       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1586     }
1587   }
1588   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1589   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1591   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1592   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1593   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1594   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1595   PetscCall(PetscFree3(work, rdest, cdest));
1596   PetscCall(PetscFree(gcdest));
1597   if (parcolp) PetscCall(ISDestroy(&colp));
1598   *B = Aperm;
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
MatGetGhosts_MPIAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1603 {
1604   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1608   if (ghosts) *ghosts = aij->garray;
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo * info)1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1613 {
1614   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1615   Mat            A = mat->A, B = mat->B;
1616   PetscLogDouble isend[5], irecv[5];
1617 
1618   PetscFunctionBegin;
1619   info->block_size = 1.0;
1620   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1621 
1622   isend[0] = info->nz_used;
1623   isend[1] = info->nz_allocated;
1624   isend[2] = info->nz_unneeded;
1625   isend[3] = info->memory;
1626   isend[4] = info->mallocs;
1627 
1628   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1629 
1630   isend[0] += info->nz_used;
1631   isend[1] += info->nz_allocated;
1632   isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;
1634   isend[4] += info->mallocs;
1635   if (flag == MAT_LOCAL) {
1636     info->nz_used      = isend[0];
1637     info->nz_allocated = isend[1];
1638     info->nz_unneeded  = isend[2];
1639     info->memory       = isend[3];
1640     info->mallocs      = isend[4];
1641   } else if (flag == MAT_GLOBAL_MAX) {
1642     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   } else if (flag == MAT_GLOBAL_SUM) {
1650     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   }
1658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1659   info->fill_ratio_needed = 0;
1660   info->factor_mallocs    = 0;
1661   PetscFunctionReturn(PETSC_SUCCESS);
1662 }
1663 
MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1665 {
1666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677   case MAT_FORM_EXPLICIT_TRANSPOSE:
1678     MatCheckPreallocated(A, 1);
1679     PetscCall(MatSetOption(a->A, op, flg));
1680     PetscCall(MatSetOption(a->B, op, flg));
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A, 1);
1684     a->roworiented = flg;
1685 
1686     PetscCall(MatSetOption(a->A, op, flg));
1687     PetscCall(MatSetOption(a->B, op, flg));
1688     break;
1689   case MAT_IGNORE_OFF_PROC_ENTRIES:
1690     a->donotstash = flg;
1691     break;
1692   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1693   case MAT_SPD:
1694   case MAT_SYMMETRIC:
1695   case MAT_STRUCTURALLY_SYMMETRIC:
1696   case MAT_HERMITIAN:
1697   case MAT_SYMMETRY_ETERNAL:
1698   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1699   case MAT_SPD_ETERNAL:
1700     /* if the diagonal matrix is square it inherits some of the properties above */
1701     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   default:
1707     break;
1708   }
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1713 {
1714   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1715   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1716   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1717   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1718   PetscInt    *cmap, *idx_p;
1719 
1720   PetscFunctionBegin;
1721   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1722   mat->getrowactive = PETSC_TRUE;
1723 
1724   if (!mat->rowvalues && (idx || v)) {
1725     /*
1726         allocate enough space to hold information from the longest row.
1727     */
1728     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1729     PetscInt    max = 1, tmp;
1730     for (i = 0; i < matin->rmap->n; i++) {
1731       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1732       if (max < tmp) max = tmp;
1733     }
1734     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1735   }
1736 
1737   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1738   lrow = row - rstart;
1739 
1740   pvA = &vworkA;
1741   pcA = &cworkA;
1742   pvB = &vworkB;
1743   pcB = &cworkB;
1744   if (!v) {
1745     pvA = NULL;
1746     pvB = NULL;
1747   }
1748   if (!idx) {
1749     pcA = NULL;
1750     if (!v) pcB = NULL;
1751   }
1752   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1753   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1754   nztot = nzA + nzB;
1755 
1756   cmap = mat->garray;
1757   if (v || idx) {
1758     if (nztot) {
1759       /* Sort by increasing column numbers, assuming A and B already sorted */
1760       PetscInt imark = -1;
1761       if (v) {
1762         *v = v_p = mat->rowvalues;
1763         for (i = 0; i < nzB; i++) {
1764           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1765           else break;
1766         }
1767         imark = i;
1768         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1769         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1770       }
1771       if (idx) {
1772         *idx = idx_p = mat->rowindices;
1773         if (imark > -1) {
1774           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1775         } else {
1776           for (i = 0; i < nzB; i++) {
1777             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1778             else break;
1779           }
1780           imark = i;
1781         }
1782         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1783         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1784       }
1785     } else {
1786       if (idx) *idx = NULL;
1787       if (v) *v = NULL;
1788     }
1789   }
1790   *nz = nztot;
1791   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1792   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1797 {
1798   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1799 
1800   PetscFunctionBegin;
1801   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1802   aij->getrowactive = PETSC_FALSE;
1803   PetscFunctionReturn(PETSC_SUCCESS);
1804 }
1805 
MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal * norm)1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1807 {
1808   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1809   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1810   PetscInt         i, j;
1811   PetscReal        sum = 0.0;
1812   const MatScalar *v, *amata, *bmata;
1813 
1814   PetscFunctionBegin;
1815   if (aij->size == 1) {
1816     PetscCall(MatNorm(aij->A, type, norm));
1817   } else {
1818     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1819     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1820     if (type == NORM_FROBENIUS) {
1821       v = amata;
1822       for (i = 0; i < amat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v) * (*v));
1824         v++;
1825       }
1826       v = bmata;
1827       for (i = 0; i < bmat->nz; i++) {
1828         sum += PetscRealPart(PetscConj(*v) * (*v));
1829         v++;
1830       }
1831       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1832       *norm = PetscSqrtReal(*norm);
1833       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1834     } else if (type == NORM_1) { /* max column norm */
1835       Vec          col, bcol;
1836       PetscScalar *array;
1837       PetscInt    *jj, *garray = aij->garray;
1838 
1839       PetscCall(MatCreateVecs(mat, &col, NULL));
1840       PetscCall(VecSet(col, 0.0));
1841       PetscCall(VecGetArrayWrite(col, &array));
1842       v  = amata;
1843       jj = amat->j;
1844       for (j = 0; j < amat->nz; j++) array[*jj++] += PetscAbsScalar(*v++);
1845       PetscCall(VecRestoreArrayWrite(col, &array));
1846       PetscCall(MatCreateVecs(aij->B, &bcol, NULL));
1847       PetscCall(VecSet(bcol, 0.0));
1848       PetscCall(VecGetArrayWrite(bcol, &array));
1849       v  = bmata;
1850       jj = bmat->j;
1851       for (j = 0; j < bmat->nz; j++) array[*jj++] += PetscAbsScalar(*v++);
1852       PetscCall(VecSetValues(col, aij->B->cmap->n, garray, array, ADD_VALUES));
1853       PetscCall(VecRestoreArrayWrite(bcol, &array));
1854       PetscCall(VecDestroy(&bcol));
1855       PetscCall(VecAssemblyBegin(col));
1856       PetscCall(VecAssemblyEnd(col));
1857       PetscCall(VecNorm(col, NORM_INFINITY, norm));
1858       PetscCall(VecDestroy(&col));
1859     } else if (type == NORM_INFINITY) { /* max row norm */
1860       PetscReal ntemp = 0.0;
1861       for (j = 0; j < aij->A->rmap->n; j++) {
1862         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1863         sum = 0.0;
1864         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1865           sum += PetscAbsScalar(*v);
1866           v++;
1867         }
1868         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1869         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v);
1871           v++;
1872         }
1873         if (sum > ntemp) ntemp = sum;
1874       }
1875       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1876       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1877     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1879     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1880   }
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat * matout)1884 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1885 {
1886   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1887   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1888   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1889   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1890   Mat              B, A_diag, *B_diag;
1891   const MatScalar *pbv, *bv;
1892 
1893   PetscFunctionBegin;
1894   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1895   ma = A->rmap->n;
1896   na = A->cmap->n;
1897   mb = a->B->rmap->n;
1898   nb = a->B->cmap->n;
1899   ai = Aloc->i;
1900   aj = Aloc->j;
1901   bi = Bloc->i;
1902   bj = Bloc->j;
1903   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1904     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1905     PetscSFNode         *oloc;
1906     PETSC_UNUSED PetscSF sf;
1907 
1908     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1909     /* compute d_nnz for preallocation */
1910     PetscCall(PetscArrayzero(d_nnz, na));
1911     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1912     /* compute local off-diagonal contributions */
1913     PetscCall(PetscArrayzero(g_nnz, nb));
1914     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1915     /* map those to global */
1916     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1917     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1918     PetscCall(PetscSFSetFromOptions(sf));
1919     PetscCall(PetscArrayzero(o_nnz, na));
1920     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1922     PetscCall(PetscSFDestroy(&sf));
1923 
1924     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1925     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1926     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1927     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1928     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1929     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1930   } else {
1931     B = *matout;
1932     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1933   }
1934 
1935   b           = (Mat_MPIAIJ *)B->data;
1936   A_diag      = a->A;
1937   B_diag      = &b->A;
1938   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1939   A_diag_ncol = A_diag->cmap->N;
1940   B_diag_ilen = sub_B_diag->ilen;
1941   B_diag_i    = sub_B_diag->i;
1942 
1943   /* Set ilen for diagonal of B */
1944   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1945 
1946   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1947   very quickly (=without using MatSetValues), because all writes are local. */
1948   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1949   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1950 
1951   /* copy over the B part */
1952   PetscCall(PetscMalloc1(bi[mb], &cols));
1953   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1954   pbv = bv;
1955   row = A->rmap->rstart;
1956   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1957   cols_tmp = cols;
1958   for (i = 0; i < mb; i++) {
1959     ncol = bi[i + 1] - bi[i];
1960     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1961     row++;
1962     if (pbv) pbv += ncol;
1963     if (cols_tmp) cols_tmp += ncol;
1964   }
1965   PetscCall(PetscFree(cols));
1966   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1967 
1968   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1969   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1970   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1971     *matout = B;
1972   } else {
1973     PetscCall(MatHeaderMerge(A, &B));
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)1978 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1979 {
1980   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1981   Mat         a = aij->A, b = aij->B;
1982   PetscInt    s1, s2, s3;
1983 
1984   PetscFunctionBegin;
1985   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1986   if (rr) {
1987     PetscCall(VecGetLocalSize(rr, &s1));
1988     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1989     /* Overlap communication with computation. */
1990     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1991   }
1992   if (ll) {
1993     PetscCall(VecGetLocalSize(ll, &s1));
1994     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1995     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1996   }
1997   /* scale  the diagonal block */
1998   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1999 
2000   if (rr) {
2001     /* Do a scatter end and then right scale the off-diagonal block */
2002     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2003     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2004   }
2005   PetscFunctionReturn(PETSC_SUCCESS);
2006 }
2007 
MatSetUnfactored_MPIAIJ(Mat A)2008 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2009 {
2010   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2011 
2012   PetscFunctionBegin;
2013   PetscCall(MatSetUnfactored(a->A));
2014   PetscFunctionReturn(PETSC_SUCCESS);
2015 }
2016 
MatEqual_MPIAIJ(Mat A,Mat B,PetscBool * flag)2017 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2018 {
2019   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2020   Mat         a, b, c, d;
2021   PetscBool   flg;
2022 
2023   PetscFunctionBegin;
2024   a = matA->A;
2025   b = matA->B;
2026   c = matB->A;
2027   d = matB->B;
2028 
2029   PetscCall(MatEqual(a, c, &flg));
2030   if (flg) PetscCall(MatEqual(b, d, &flg));
2031   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2032   PetscFunctionReturn(PETSC_SUCCESS);
2033 }
2034 
MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)2035 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2036 {
2037   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2038   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2039 
2040   PetscFunctionBegin;
2041   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2042   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2043     /* because of the column compression in the off-processor part of the matrix a->B,
2044        the number of columns in a->B and b->B may be different, hence we cannot call
2045        the MatCopy() directly on the two parts. If need be, we can provide a more
2046        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2047        then copying the submatrices */
2048     PetscCall(MatCopy_Basic(A, B, str));
2049   } else {
2050     PetscCall(MatCopy(a->A, b->A, str));
2051     PetscCall(MatCopy(a->B, b->B, str));
2052   }
2053   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2054   PetscFunctionReturn(PETSC_SUCCESS);
2055 }
2056 
2057 /*
2058    Computes the number of nonzeros per row needed for preallocation when X and Y
2059    have different nonzero structure.
2060 */
MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt * xi,const PetscInt * xj,const PetscInt * xltog,const PetscInt * yi,const PetscInt * yj,const PetscInt * yltog,PetscInt * nnz)2061 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2062 {
2063   PetscInt i, j, k, nzx, nzy;
2064 
2065   PetscFunctionBegin;
2066   /* Set the number of nonzeros in the new matrix */
2067   for (i = 0; i < m; i++) {
2068     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2069     nzx    = xi[i + 1] - xi[i];
2070     nzy    = yi[i + 1] - yi[i];
2071     nnz[i] = 0;
2072     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2073       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2074       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2075       nnz[i]++;
2076     }
2077     for (; k < nzy; k++) nnz[i]++;
2078   }
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)2083 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2084 {
2085   PetscInt    m = Y->rmap->N;
2086   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2087   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2088 
2089   PetscFunctionBegin;
2090   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2091   PetscFunctionReturn(PETSC_SUCCESS);
2092 }
2093 
MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2094 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2095 {
2096   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2097 
2098   PetscFunctionBegin;
2099   if (str == SAME_NONZERO_PATTERN) {
2100     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2101     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2102   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2103     PetscCall(MatAXPY_Basic(Y, a, X, str));
2104   } else {
2105     Mat       B;
2106     PetscInt *nnz_d, *nnz_o;
2107 
2108     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2109     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2110     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2111     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2112     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2113     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2114     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2115     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2116     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2117     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2118     PetscCall(MatHeaderMerge(Y, &B));
2119     PetscCall(PetscFree(nnz_d));
2120     PetscCall(PetscFree(nnz_o));
2121   }
2122   PetscFunctionReturn(PETSC_SUCCESS);
2123 }
2124 
2125 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2126 
MatConjugate_MPIAIJ(Mat mat)2127 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2128 {
2129   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(MatConjugate_SeqAIJ(aij->A));
2133   PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
MatRealPart_MPIAIJ(Mat A)2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatRealPart(a->A));
2143   PetscCall(MatRealPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
MatImaginaryPart_MPIAIJ(Mat A)2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150 
2151   PetscFunctionBegin;
2152   PetscCall(MatImaginaryPart(a->A));
2153   PetscCall(MatImaginaryPart(a->B));
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2158 {
2159   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2160   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2161   PetscScalar       *vv;
2162   Vec                vB, vA;
2163   const PetscScalar *va, *vb;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2167   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2168 
2169   PetscCall(VecGetArrayRead(vA, &va));
2170   if (idx) {
2171     for (i = 0; i < m; i++) {
2172       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2173     }
2174   }
2175 
2176   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2177   PetscCall(PetscMalloc1(m, &idxb));
2178   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2179 
2180   PetscCall(VecGetArrayWrite(v, &vv));
2181   PetscCall(VecGetArrayRead(vB, &vb));
2182   for (i = 0; i < m; i++) {
2183     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2184       vv[i] = vb[i];
2185       if (idx) idx[i] = a->garray[idxb[i]];
2186     } else {
2187       vv[i] = va[i];
2188       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2189     }
2190   }
2191   PetscCall(VecRestoreArrayWrite(v, &vv));
2192   PetscCall(VecRestoreArrayRead(vA, &va));
2193   PetscCall(VecRestoreArrayRead(vB, &vb));
2194   PetscCall(PetscFree(idxb));
2195   PetscCall(VecDestroy(&vA));
2196   PetscCall(VecDestroy(&vB));
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
MatGetRowSumAbs_MPIAIJ(Mat A,Vec v)2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2201 {
2202   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2203   Vec         vB, vA;
2204 
2205   PetscFunctionBegin;
2206   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2207   PetscCall(MatGetRowSumAbs(a->A, vA));
2208   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2209   PetscCall(MatGetRowSumAbs(a->B, vB));
2210   PetscCall(VecAXPY(vA, 1.0, vB));
2211   PetscCall(VecDestroy(&vB));
2212   PetscCall(VecCopy(vA, v));
2213   PetscCall(VecDestroy(&vA));
2214   PetscFunctionReturn(PETSC_SUCCESS);
2215 }
2216 
MatGetRowMinAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2218 {
2219   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2220   PetscInt           m = A->rmap->n, n = A->cmap->n;
2221   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2222   PetscInt          *cmap = mat->garray;
2223   PetscInt          *diagIdx, *offdiagIdx;
2224   Vec                diagV, offdiagV;
2225   PetscScalar       *a, *diagA, *offdiagA;
2226   const PetscScalar *ba, *bav;
2227   PetscInt           r, j, col, ncols, *bi, *bj;
2228   Mat                B = mat->B;
2229   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2230 
2231   PetscFunctionBegin;
2232   /* When a process holds entire A and other processes have no entry */
2233   if (A->cmap->N == n) {
2234     PetscCall(VecGetArrayWrite(v, &diagA));
2235     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2236     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2237     PetscCall(VecDestroy(&diagV));
2238     PetscCall(VecRestoreArrayWrite(v, &diagA));
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   } else if (n == 0) {
2241     if (m) {
2242       PetscCall(VecGetArrayWrite(v, &a));
2243       for (r = 0; r < m; r++) {
2244         a[r] = 0.0;
2245         if (idx) idx[r] = -1;
2246       }
2247       PetscCall(VecRestoreArrayWrite(v, &a));
2248     }
2249     PetscFunctionReturn(PETSC_SUCCESS);
2250   }
2251 
2252   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2253   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2255   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2256 
2257   /* Get offdiagIdx[] for implicit 0.0 */
2258   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2259   ba = bav;
2260   bi = b->i;
2261   bj = b->j;
2262   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2263   for (r = 0; r < m; r++) {
2264     ncols = bi[r + 1] - bi[r];
2265     if (ncols == A->cmap->N - n) { /* Brow is dense */
2266       offdiagA[r]   = *ba;
2267       offdiagIdx[r] = cmap[0];
2268     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2269       offdiagA[r] = 0.0;
2270 
2271       /* Find first hole in the cmap */
2272       for (j = 0; j < ncols; j++) {
2273         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2274         if (col > j && j < cstart) {
2275           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2276           break;
2277         } else if (col > j + n && j >= cstart) {
2278           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2279           break;
2280         }
2281       }
2282       if (j == ncols && ncols < A->cmap->N - n) {
2283         /* a hole is outside compressed Bcols */
2284         if (ncols == 0) {
2285           if (cstart) {
2286             offdiagIdx[r] = 0;
2287           } else offdiagIdx[r] = cend;
2288         } else { /* ncols > 0 */
2289           offdiagIdx[r] = cmap[ncols - 1] + 1;
2290           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2291         }
2292       }
2293     }
2294 
2295     for (j = 0; j < ncols; j++) {
2296       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2297         offdiagA[r]   = *ba;
2298         offdiagIdx[r] = cmap[*bj];
2299       }
2300       ba++;
2301       bj++;
2302     }
2303   }
2304 
2305   PetscCall(VecGetArrayWrite(v, &a));
2306   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2307   for (r = 0; r < m; ++r) {
2308     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2309       a[r] = diagA[r];
2310       if (idx) idx[r] = cstart + diagIdx[r];
2311     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2312       a[r] = diagA[r];
2313       if (idx) {
2314         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2315           idx[r] = cstart + diagIdx[r];
2316         } else idx[r] = offdiagIdx[r];
2317       }
2318     } else {
2319       a[r] = offdiagA[r];
2320       if (idx) idx[r] = offdiagIdx[r];
2321     }
2322   }
2323   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2324   PetscCall(VecRestoreArrayWrite(v, &a));
2325   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2326   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2327   PetscCall(VecDestroy(&diagV));
2328   PetscCall(VecDestroy(&offdiagV));
2329   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2330   PetscFunctionReturn(PETSC_SUCCESS);
2331 }
2332 
MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2334 {
2335   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2336   PetscInt           m = A->rmap->n, n = A->cmap->n;
2337   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2338   PetscInt          *cmap = mat->garray;
2339   PetscInt          *diagIdx, *offdiagIdx;
2340   Vec                diagV, offdiagV;
2341   PetscScalar       *a, *diagA, *offdiagA;
2342   const PetscScalar *ba, *bav;
2343   PetscInt           r, j, col, ncols, *bi, *bj;
2344   Mat                B = mat->B;
2345   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2346 
2347   PetscFunctionBegin;
2348   /* When a process holds entire A and other processes have no entry */
2349   if (A->cmap->N == n) {
2350     PetscCall(VecGetArrayWrite(v, &diagA));
2351     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2352     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2353     PetscCall(VecDestroy(&diagV));
2354     PetscCall(VecRestoreArrayWrite(v, &diagA));
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   } else if (n == 0) {
2357     if (m) {
2358       PetscCall(VecGetArrayWrite(v, &a));
2359       for (r = 0; r < m; r++) {
2360         a[r] = PETSC_MAX_REAL;
2361         if (idx) idx[r] = -1;
2362       }
2363       PetscCall(VecRestoreArrayWrite(v, &a));
2364     }
2365     PetscFunctionReturn(PETSC_SUCCESS);
2366   }
2367 
2368   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2369   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2371   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2372 
2373   /* Get offdiagIdx[] for implicit 0.0 */
2374   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2375   ba = bav;
2376   bi = b->i;
2377   bj = b->j;
2378   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2379   for (r = 0; r < m; r++) {
2380     ncols = bi[r + 1] - bi[r];
2381     if (ncols == A->cmap->N - n) { /* Brow is dense */
2382       offdiagA[r]   = *ba;
2383       offdiagIdx[r] = cmap[0];
2384     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2385       offdiagA[r] = 0.0;
2386 
2387       /* Find first hole in the cmap */
2388       for (j = 0; j < ncols; j++) {
2389         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2390         if (col > j && j < cstart) {
2391           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2392           break;
2393         } else if (col > j + n && j >= cstart) {
2394           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2395           break;
2396         }
2397       }
2398       if (j == ncols && ncols < A->cmap->N - n) {
2399         /* a hole is outside compressed Bcols */
2400         if (ncols == 0) {
2401           if (cstart) {
2402             offdiagIdx[r] = 0;
2403           } else offdiagIdx[r] = cend;
2404         } else { /* ncols > 0 */
2405           offdiagIdx[r] = cmap[ncols - 1] + 1;
2406           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2407         }
2408       }
2409     }
2410 
2411     for (j = 0; j < ncols; j++) {
2412       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2413         offdiagA[r]   = *ba;
2414         offdiagIdx[r] = cmap[*bj];
2415       }
2416       ba++;
2417       bj++;
2418     }
2419   }
2420 
2421   PetscCall(VecGetArrayWrite(v, &a));
2422   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2423   for (r = 0; r < m; ++r) {
2424     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2425       a[r] = diagA[r];
2426       if (idx) idx[r] = cstart + diagIdx[r];
2427     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2428       a[r] = diagA[r];
2429       if (idx) {
2430         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2431           idx[r] = cstart + diagIdx[r];
2432         } else idx[r] = offdiagIdx[r];
2433       }
2434     } else {
2435       a[r] = offdiagA[r];
2436       if (idx) idx[r] = offdiagIdx[r];
2437     }
2438   }
2439   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2440   PetscCall(VecRestoreArrayWrite(v, &a));
2441   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2442   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2443   PetscCall(VecDestroy(&diagV));
2444   PetscCall(VecDestroy(&offdiagV));
2445   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2446   PetscFunctionReturn(PETSC_SUCCESS);
2447 }
2448 
MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2450 {
2451   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2452   PetscInt           m = A->rmap->n, n = A->cmap->n;
2453   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2454   PetscInt          *cmap = mat->garray;
2455   PetscInt          *diagIdx, *offdiagIdx;
2456   Vec                diagV, offdiagV;
2457   PetscScalar       *a, *diagA, *offdiagA;
2458   const PetscScalar *ba, *bav;
2459   PetscInt           r, j, col, ncols, *bi, *bj;
2460   Mat                B = mat->B;
2461   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2462 
2463   PetscFunctionBegin;
2464   /* When a process holds entire A and other processes have no entry */
2465   if (A->cmap->N == n) {
2466     PetscCall(VecGetArrayWrite(v, &diagA));
2467     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2468     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2469     PetscCall(VecDestroy(&diagV));
2470     PetscCall(VecRestoreArrayWrite(v, &diagA));
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   } else if (n == 0) {
2473     if (m) {
2474       PetscCall(VecGetArrayWrite(v, &a));
2475       for (r = 0; r < m; r++) {
2476         a[r] = PETSC_MIN_REAL;
2477         if (idx) idx[r] = -1;
2478       }
2479       PetscCall(VecRestoreArrayWrite(v, &a));
2480     }
2481     PetscFunctionReturn(PETSC_SUCCESS);
2482   }
2483 
2484   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2485   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2487   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2488 
2489   /* Get offdiagIdx[] for implicit 0.0 */
2490   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2491   ba = bav;
2492   bi = b->i;
2493   bj = b->j;
2494   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r + 1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r]   = *ba;
2499       offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j = 0; j < ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols - 1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j = 0; j < ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2529         offdiagA[r]   = *ba;
2530         offdiagIdx[r] = cmap[*bj];
2531       }
2532       ba++;
2533       bj++;
2534     }
2535   }
2536 
2537   PetscCall(VecGetArrayWrite(v, &a));
2538   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2539   for (r = 0; r < m; ++r) {
2540     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2541       a[r] = diagA[r];
2542       if (idx) idx[r] = cstart + diagIdx[r];
2543     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2544       a[r] = diagA[r];
2545       if (idx) {
2546         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2547           idx[r] = cstart + diagIdx[r];
2548         } else idx[r] = offdiagIdx[r];
2549       }
2550     } else {
2551       a[r] = offdiagA[r];
2552       if (idx) idx[r] = offdiagIdx[r];
2553     }
2554   }
2555   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2556   PetscCall(VecRestoreArrayWrite(v, &a));
2557   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2558   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2559   PetscCall(VecDestroy(&diagV));
2560   PetscCall(VecDestroy(&offdiagV));
2561   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2562   PetscFunctionReturn(PETSC_SUCCESS);
2563 }
2564 
MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat * newmat)2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2566 {
2567   Mat *dummy;
2568 
2569   PetscFunctionBegin;
2570   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2571   *newmat = *dummy;
2572   PetscCall(PetscFree(dummy));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar ** values)2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2577 {
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatInvertBlockDiagonal(a->A, values));
2582   A->factorerrortype = a->A->factorerrortype;
2583   PetscFunctionReturn(PETSC_SUCCESS);
2584 }
2585 
MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2587 {
2588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2592   PetscCall(MatSetRandom(aij->A, rctx));
2593   if (x->assembled) {
2594     PetscCall(MatSetRandom(aij->B, rctx));
2595   } else {
2596     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2597   }
2598   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2599   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2600   PetscFunctionReturn(PETSC_SUCCESS);
2601 }
2602 
MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2604 {
2605   PetscFunctionBegin;
2606   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2607   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2608   PetscFunctionReturn(PETSC_SUCCESS);
2609 }
2610 
2611 /*@
2612   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2613 
2614   Not Collective
2615 
2616   Input Parameter:
2617 . A - the matrix
2618 
2619   Output Parameter:
2620 . nz - the number of nonzeros
2621 
2622   Level: advanced
2623 
2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2625 @*/
MatMPIAIJGetNumberNonzeros(Mat A,PetscCount * nz)2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2627 {
2628   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2629   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2630   PetscBool   isaij;
2631 
2632   PetscFunctionBegin;
2633   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2634   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2635   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 /*@
2640   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2641 
2642   Collective
2643 
2644   Input Parameters:
2645 + A  - the matrix
2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2647 
2648   Level: advanced
2649 
2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2651 @*/
MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2653 {
2654   PetscFunctionBegin;
2655   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
MatSetFromOptions_MPIAIJ(Mat A,PetscOptionItems PetscOptionsObject)2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2660 {
2661   PetscBool sc = PETSC_FALSE, flg;
2662 
2663   PetscFunctionBegin;
2664   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2665   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2666   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2667   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2668   PetscOptionsHeadEnd();
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
MatShift_MPIAIJ(Mat Y,PetscScalar a)2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2673 {
2674   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2675   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2676 
2677   PetscFunctionBegin;
2678   if (!Y->preallocated) {
2679     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2680   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2681     PetscInt nonew = aij->nonew;
2682     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2683     aij->nonew = nonew;
2684   }
2685   PetscCall(MatShift_Basic(Y, a));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt * bsizes,PetscScalar * diag)2689 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2690 {
2691   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2692 
2693   PetscFunctionBegin;
2694   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
MatEliminateZeros_MPIAIJ(Mat A,PetscBool keep)2698 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2704   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2705   PetscFunctionReturn(PETSC_SUCCESS);
2706 }
2707 
2708 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2709                                        MatGetRow_MPIAIJ,
2710                                        MatRestoreRow_MPIAIJ,
2711                                        MatMult_MPIAIJ,
2712                                        /* 4*/ MatMultAdd_MPIAIJ,
2713                                        MatMultTranspose_MPIAIJ,
2714                                        MatMultTransposeAdd_MPIAIJ,
2715                                        NULL,
2716                                        NULL,
2717                                        NULL,
2718                                        /*10*/ NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        MatSOR_MPIAIJ,
2722                                        MatTranspose_MPIAIJ,
2723                                        /*15*/ MatGetInfo_MPIAIJ,
2724                                        MatEqual_MPIAIJ,
2725                                        MatGetDiagonal_MPIAIJ,
2726                                        MatDiagonalScale_MPIAIJ,
2727                                        MatNorm_MPIAIJ,
2728                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2729                                        MatAssemblyEnd_MPIAIJ,
2730                                        MatSetOption_MPIAIJ,
2731                                        MatZeroEntries_MPIAIJ,
2732                                        /*24*/ MatZeroRows_MPIAIJ,
2733                                        NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        /*29*/ MatSetUp_MPI_Hash,
2738                                        NULL,
2739                                        NULL,
2740                                        MatGetDiagonalBlock_MPIAIJ,
2741                                        NULL,
2742                                        /*34*/ MatDuplicate_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        /*39*/ MatAXPY_MPIAIJ,
2748                                        MatCreateSubMatrices_MPIAIJ,
2749                                        MatIncreaseOverlap_MPIAIJ,
2750                                        MatGetValues_MPIAIJ,
2751                                        MatCopy_MPIAIJ,
2752                                        /*44*/ MatGetRowMax_MPIAIJ,
2753                                        MatScale_MPIAIJ,
2754                                        MatShift_MPIAIJ,
2755                                        MatDiagonalSet_MPIAIJ,
2756                                        MatZeroRowsColumns_MPIAIJ,
2757                                        /*49*/ MatSetRandom_MPIAIJ,
2758                                        MatGetRowIJ_MPIAIJ,
2759                                        MatRestoreRowIJ_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2763                                        NULL,
2764                                        MatSetUnfactored_MPIAIJ,
2765                                        MatPermute_MPIAIJ,
2766                                        NULL,
2767                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2768                                        MatDestroy_MPIAIJ,
2769                                        MatView_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                        MatGetRowMaxAbs_MPIAIJ,
2777                                        /*69*/ MatGetRowMinAbs_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        MatFDColoringApply_AIJ,
2781                                        MatSetFromOptions_MPIAIJ,
2782                                        MatFindZeroDiagonals_MPIAIJ,
2783                                        /*75*/ NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        MatLoad_MPIAIJ,
2787                                        NULL,
2788                                        /*80*/ NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        /*83*/ NULL,
2792                                        NULL,
2793                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2794                                        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        /*89*/ MatBindToCPU_MPIAIJ,
2798                                        MatProductSetFromOptions_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        MatConjugate_MPIAIJ,
2802                                        /*94*/ NULL,
2803                                        MatSetValuesRow_MPIAIJ,
2804                                        MatRealPart_MPIAIJ,
2805                                        MatImaginaryPart_MPIAIJ,
2806                                        NULL,
2807                                        /*99*/ NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        MatGetRowMin_MPIAIJ,
2811                                        NULL,
2812                                        /*104*/ MatGetSeqNonzeroStructure_MPIAIJ,
2813                                        NULL,
2814                                        MatGetGhosts_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        /*109*/ MatMultDiagonalBlock_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        MatGetMultiProcBlock_MPIAIJ,
2822                                        /*114*/ MatFindNonzeroRows_MPIAIJ,
2823                                        MatGetColumnReductions_MPIAIJ,
2824                                        MatInvertBlockDiagonal_MPIAIJ,
2825                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2826                                        MatCreateSubMatricesMPI_MPIAIJ,
2827                                        /*119*/ NULL,
2828                                        NULL,
2829                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*124*/ NULL,
2833                                        NULL,
2834                                        MatSetBlockSizes_MPIAIJ,
2835                                        NULL,
2836                                        MatFDColoringSetUp_MPIXAIJ,
2837                                        /*129*/ MatFindOffBlockDiagonalEntries_MPIAIJ,
2838                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2839                                        NULL,
2840                                        NULL,
2841                                        NULL,
2842                                        /*134*/ MatCreateGraph_Simple_AIJ,
2843                                        NULL,
2844                                        MatEliminateZeros_MPIAIJ,
2845                                        MatGetRowSumAbs_MPIAIJ,
2846                                        NULL,
2847                                        /*139*/ NULL,
2848                                        NULL,
2849                                        MatCopyHashToXAIJ_MPI_Hash,
2850                                        MatGetCurrentMemType_MPIAIJ,
2851                                        NULL};
2852 
MatStoreValues_MPIAIJ(Mat mat)2853 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2854 {
2855   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2856 
2857   PetscFunctionBegin;
2858   PetscCall(MatStoreValues(aij->A));
2859   PetscCall(MatStoreValues(aij->B));
2860   PetscFunctionReturn(PETSC_SUCCESS);
2861 }
2862 
MatRetrieveValues_MPIAIJ(Mat mat)2863 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2864 {
2865   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2866 
2867   PetscFunctionBegin;
2868   PetscCall(MatRetrieveValues(aij->A));
2869   PetscCall(MatRetrieveValues(aij->B));
2870   PetscFunctionReturn(PETSC_SUCCESS);
2871 }
2872 
MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2873 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2874 {
2875   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2876   PetscMPIInt size;
2877 
2878   PetscFunctionBegin;
2879   if (B->hash_active) {
2880     B->ops[0]      = b->cops;
2881     B->hash_active = PETSC_FALSE;
2882   }
2883   PetscCall(PetscLayoutSetUp(B->rmap));
2884   PetscCall(PetscLayoutSetUp(B->cmap));
2885 
2886 #if defined(PETSC_USE_CTABLE)
2887   PetscCall(PetscHMapIDestroy(&b->colmap));
2888 #else
2889   PetscCall(PetscFree(b->colmap));
2890 #endif
2891   PetscCall(PetscFree(b->garray));
2892   PetscCall(VecDestroy(&b->lvec));
2893   PetscCall(VecScatterDestroy(&b->Mvctx));
2894 
2895   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2896 
2897   MatSeqXAIJGetOptions_Private(b->B);
2898   PetscCall(MatDestroy(&b->B));
2899   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2900   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2901   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2902   PetscCall(MatSetType(b->B, MATSEQAIJ));
2903   MatSeqXAIJRestoreOptions_Private(b->B);
2904 
2905   MatSeqXAIJGetOptions_Private(b->A);
2906   PetscCall(MatDestroy(&b->A));
2907   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2908   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2909   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2910   PetscCall(MatSetType(b->A, MATSEQAIJ));
2911   MatSeqXAIJRestoreOptions_Private(b->A);
2912 
2913   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2914   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2915   B->preallocated  = PETSC_TRUE;
2916   B->was_assembled = PETSC_FALSE;
2917   B->assembled     = PETSC_FALSE;
2918   PetscFunctionReturn(PETSC_SUCCESS);
2919 }
2920 
MatResetPreallocation_MPIAIJ(Mat B)2921 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2922 {
2923   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2924   PetscBool   ondiagreset, offdiagreset, memoryreset;
2925 
2926   PetscFunctionBegin;
2927   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2928   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2929   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2930 
2931   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2932   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2933   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2934   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2935   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2936 
2937   PetscCall(PetscLayoutSetUp(B->rmap));
2938   PetscCall(PetscLayoutSetUp(B->cmap));
2939   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2940   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2941   PetscCall(VecScatterDestroy(&b->Mvctx));
2942 
2943   B->preallocated  = PETSC_TRUE;
2944   B->was_assembled = PETSC_FALSE;
2945   B->assembled     = PETSC_FALSE;
2946   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2947   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2952 {
2953   Mat         mat;
2954   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2955 
2956   PetscFunctionBegin;
2957   *newmat = NULL;
2958   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2959   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2960   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2961   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2962   a = (Mat_MPIAIJ *)mat->data;
2963 
2964   mat->factortype = matin->factortype;
2965   mat->assembled  = matin->assembled;
2966   mat->insertmode = NOT_SET_VALUES;
2967 
2968   a->size         = oldmat->size;
2969   a->rank         = oldmat->rank;
2970   a->donotstash   = oldmat->donotstash;
2971   a->roworiented  = oldmat->roworiented;
2972   a->rowindices   = NULL;
2973   a->rowvalues    = NULL;
2974   a->getrowactive = PETSC_FALSE;
2975 
2976   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2977   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2978   if (matin->hash_active) {
2979     PetscCall(MatSetUp(mat));
2980   } else {
2981     mat->preallocated = matin->preallocated;
2982     if (oldmat->colmap) {
2983 #if defined(PETSC_USE_CTABLE)
2984       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2985 #else
2986       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2987       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2988 #endif
2989     } else a->colmap = NULL;
2990     if (oldmat->garray) {
2991       PetscInt len;
2992       len = oldmat->B->cmap->n;
2993       PetscCall(PetscMalloc1(len, &a->garray));
2994       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2995     } else a->garray = NULL;
2996 
2997     /* It may happen MatDuplicate is called with a non-assembled matrix
2998       In fact, MatDuplicate only requires the matrix to be preallocated
2999       This may happen inside a DMCreateMatrix_Shell */
3000     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3001     if (oldmat->Mvctx) {
3002       a->Mvctx = oldmat->Mvctx;
3003       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3004     }
3005     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3006     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3007   }
3008   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3009   *newmat = mat;
3010   PetscFunctionReturn(PETSC_SUCCESS);
3011 }
3012 
MatLoad_MPIAIJ(Mat newMat,PetscViewer viewer)3013 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3014 {
3015   PetscBool isbinary, ishdf5;
3016 
3017   PetscFunctionBegin;
3018   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3019   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3020   /* force binary viewer to load .info file if it has not yet done so */
3021   PetscCall(PetscViewerSetUp(viewer));
3022   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3023   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3024   if (isbinary) {
3025     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3026   } else if (ishdf5) {
3027 #if defined(PETSC_HAVE_HDF5)
3028     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3029 #else
3030     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3031 #endif
3032   } else {
3033     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3034   }
3035   PetscFunctionReturn(PETSC_SUCCESS);
3036 }
3037 
MatLoad_MPIAIJ_Binary(Mat mat,PetscViewer viewer)3038 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3039 {
3040   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3041   PetscInt    *rowidxs, *colidxs;
3042   PetscScalar *matvals;
3043 
3044   PetscFunctionBegin;
3045   PetscCall(PetscViewerSetUp(viewer));
3046 
3047   /* read in matrix header */
3048   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3049   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3050   M  = header[1];
3051   N  = header[2];
3052   nz = header[3];
3053   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3054   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3055   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3056 
3057   /* set block sizes from the viewer's .info file */
3058   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3059   /* set global sizes if not set already */
3060   if (mat->rmap->N < 0) mat->rmap->N = M;
3061   if (mat->cmap->N < 0) mat->cmap->N = N;
3062   PetscCall(PetscLayoutSetUp(mat->rmap));
3063   PetscCall(PetscLayoutSetUp(mat->cmap));
3064 
3065   /* check if the matrix sizes are correct */
3066   PetscCall(MatGetSize(mat, &rows, &cols));
3067   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3068 
3069   /* read in row lengths and build row indices */
3070   PetscCall(MatGetLocalSize(mat, &m, NULL));
3071   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3072   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3073   rowidxs[0] = 0;
3074   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3075   if (nz != PETSC_INT_MAX) {
3076     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3077     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3078   }
3079 
3080   /* read in column indices and matrix values */
3081   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3082   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3083   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3084   /* store matrix indices and values */
3085   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3086   PetscCall(PetscFree(rowidxs));
3087   PetscCall(PetscFree2(colidxs, matvals));
3088   PetscFunctionReturn(PETSC_SUCCESS);
3089 }
3090 
3091 /* Not scalable because of ISAllGather() unless getting all columns. */
ISGetSeqIS_Private(Mat mat,IS iscol,IS * isseq)3092 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3093 {
3094   IS          iscol_local;
3095   PetscBool   isstride;
3096   PetscMPIInt gisstride = 0;
3097 
3098   PetscFunctionBegin;
3099   /* check if we are grabbing all columns*/
3100   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3101 
3102   if (isstride) {
3103     PetscInt start, len, mstart, mlen;
3104     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3105     PetscCall(ISGetLocalSize(iscol, &len));
3106     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3107     if (mstart == start && mlen - mstart == len) gisstride = 1;
3108   }
3109 
3110   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3111   if (gisstride) {
3112     PetscInt N;
3113     PetscCall(MatGetSize(mat, NULL, &N));
3114     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3115     PetscCall(ISSetIdentity(iscol_local));
3116     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3117   } else {
3118     PetscInt cbs;
3119     PetscCall(ISGetBlockSize(iscol, &cbs));
3120     PetscCall(ISAllGather(iscol, &iscol_local));
3121     PetscCall(ISSetBlockSize(iscol_local, cbs));
3122   }
3123 
3124   *isseq = iscol_local;
3125   PetscFunctionReturn(PETSC_SUCCESS);
3126 }
3127 
3128 /*
3129  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3130  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3131 
3132  Input Parameters:
3133 +   mat - matrix
3134 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3135            i.e., mat->rstart <= isrow[i] < mat->rend
3136 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3137            i.e., mat->cstart <= iscol[i] < mat->cend
3138 
3139  Output Parameters:
3140 +   isrow_d - sequential row index set for retrieving mat->A
3141 .   iscol_d - sequential  column index set for retrieving mat->A
3142 .   iscol_o - sequential column index set for retrieving mat->B
3143 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3144  */
ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS * isrow_d,IS * iscol_d,IS * iscol_o,PetscInt * garray[])3145 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3146 {
3147   Vec             x, cmap;
3148   const PetscInt *is_idx;
3149   PetscScalar    *xarray, *cmaparray;
3150   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3151   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3152   Mat             B    = a->B;
3153   Vec             lvec = a->lvec, lcmap;
3154   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3155   MPI_Comm        comm;
3156   VecScatter      Mvctx = a->Mvctx;
3157 
3158   PetscFunctionBegin;
3159   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3160   PetscCall(ISGetLocalSize(iscol, &ncols));
3161 
3162   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3163   PetscCall(MatCreateVecs(mat, &x, NULL));
3164   PetscCall(VecSet(x, -1.0));
3165   PetscCall(VecDuplicate(x, &cmap));
3166   PetscCall(VecSet(cmap, -1.0));
3167 
3168   /* Get start indices */
3169   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3170   isstart -= ncols;
3171   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3172 
3173   PetscCall(ISGetIndices(iscol, &is_idx));
3174   PetscCall(VecGetArray(x, &xarray));
3175   PetscCall(VecGetArray(cmap, &cmaparray));
3176   PetscCall(PetscMalloc1(ncols, &idx));
3177   for (i = 0; i < ncols; i++) {
3178     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3179     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3180     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3181   }
3182   PetscCall(VecRestoreArray(x, &xarray));
3183   PetscCall(VecRestoreArray(cmap, &cmaparray));
3184   PetscCall(ISRestoreIndices(iscol, &is_idx));
3185 
3186   /* Get iscol_d */
3187   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3188   PetscCall(ISGetBlockSize(iscol, &i));
3189   PetscCall(ISSetBlockSize(*iscol_d, i));
3190 
3191   /* Get isrow_d */
3192   PetscCall(ISGetLocalSize(isrow, &m));
3193   rstart = mat->rmap->rstart;
3194   PetscCall(PetscMalloc1(m, &idx));
3195   PetscCall(ISGetIndices(isrow, &is_idx));
3196   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3197   PetscCall(ISRestoreIndices(isrow, &is_idx));
3198 
3199   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3200   PetscCall(ISGetBlockSize(isrow, &i));
3201   PetscCall(ISSetBlockSize(*isrow_d, i));
3202 
3203   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3204   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3205   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3206 
3207   PetscCall(VecDuplicate(lvec, &lcmap));
3208 
3209   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3210   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3211 
3212   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3213   /* off-process column indices */
3214   count = 0;
3215   PetscCall(PetscMalloc1(Bn, &idx));
3216   PetscCall(PetscMalloc1(Bn, &cmap1));
3217 
3218   PetscCall(VecGetArray(lvec, &xarray));
3219   PetscCall(VecGetArray(lcmap, &cmaparray));
3220   for (i = 0; i < Bn; i++) {
3221     if (PetscRealPart(xarray[i]) > -1.0) {
3222       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3223       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3224       count++;
3225     }
3226   }
3227   PetscCall(VecRestoreArray(lvec, &xarray));
3228   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3229 
3230   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3231   /* cannot ensure iscol_o has same blocksize as iscol! */
3232 
3233   PetscCall(PetscFree(idx));
3234   *garray = cmap1;
3235 
3236   PetscCall(VecDestroy(&x));
3237   PetscCall(VecDestroy(&cmap));
3238   PetscCall(VecDestroy(&lcmap));
3239   PetscFunctionReturn(PETSC_SUCCESS);
3240 }
3241 
3242 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * submat)3243 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3244 {
3245   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3246   Mat         M = NULL;
3247   MPI_Comm    comm;
3248   IS          iscol_d, isrow_d, iscol_o;
3249   Mat         Asub = NULL, Bsub = NULL;
3250   PetscInt    n, count, M_size, N_size;
3251 
3252   PetscFunctionBegin;
3253   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3254 
3255   if (call == MAT_REUSE_MATRIX) {
3256     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3257     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3258     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3259 
3260     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3261     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3262 
3263     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3264     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3265 
3266     /* Update diagonal and off-diagonal portions of submat */
3267     asub = (Mat_MPIAIJ *)(*submat)->data;
3268     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3269     PetscCall(ISGetLocalSize(iscol_o, &n));
3270     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3271     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3272     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3273 
3274   } else { /* call == MAT_INITIAL_MATRIX) */
3275     PetscInt *garray, *garray_compact;
3276     PetscInt  BsubN;
3277 
3278     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3279     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3280 
3281     /* Create local submatrices Asub and Bsub */
3282     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3284 
3285     // Compact garray so its not of size Bn
3286     PetscCall(ISGetSize(iscol_o, &count));
3287     PetscCall(PetscMalloc1(count, &garray_compact));
3288     PetscCall(PetscArraycpy(garray_compact, garray, count));
3289 
3290     /* Create submatrix M */
3291     PetscCall(ISGetSize(isrow, &M_size));
3292     PetscCall(ISGetSize(iscol, &N_size));
3293     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3294 
3295     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3296     asub = (Mat_MPIAIJ *)M->data;
3297 
3298     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3299     n = asub->B->cmap->N;
3300     if (BsubN > n) {
3301       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3302       const PetscInt *idx;
3303       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3304       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3305 
3306       PetscCall(PetscMalloc1(n, &idx_new));
3307       j = 0;
3308       PetscCall(ISGetIndices(iscol_o, &idx));
3309       for (i = 0; i < n; i++) {
3310         if (j >= BsubN) break;
3311         while (subgarray[i] > garray[j]) j++;
3312 
3313         PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3314         idx_new[i] = idx[j++];
3315       }
3316       PetscCall(ISRestoreIndices(iscol_o, &idx));
3317 
3318       PetscCall(ISDestroy(&iscol_o));
3319       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3320 
3321     } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3322 
3323     PetscCall(PetscFree(garray));
3324     *submat = M;
3325 
3326     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3327     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3328     PetscCall(ISDestroy(&isrow_d));
3329 
3330     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3331     PetscCall(ISDestroy(&iscol_d));
3332 
3333     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3334     PetscCall(ISDestroy(&iscol_o));
3335   }
3336   PetscFunctionReturn(PETSC_SUCCESS);
3337 }
3338 
MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)3339 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3340 {
3341   IS        iscol_local = NULL, isrow_d;
3342   PetscInt  csize;
3343   PetscInt  n, i, j, start, end;
3344   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3345   MPI_Comm  comm;
3346 
3347   PetscFunctionBegin;
3348   /* If isrow has same processor distribution as mat,
3349      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3350   if (call == MAT_REUSE_MATRIX) {
3351     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3352     if (isrow_d) {
3353       sameRowDist  = PETSC_TRUE;
3354       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3355     } else {
3356       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3357       if (iscol_local) {
3358         sameRowDist  = PETSC_TRUE;
3359         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3360       }
3361     }
3362   } else {
3363     /* Check if isrow has same processor distribution as mat */
3364     sameDist[0] = PETSC_FALSE;
3365     PetscCall(ISGetLocalSize(isrow, &n));
3366     if (!n) {
3367       sameDist[0] = PETSC_TRUE;
3368     } else {
3369       PetscCall(ISGetMinMax(isrow, &i, &j));
3370       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3371       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3372     }
3373 
3374     /* Check if iscol has same processor distribution as mat */
3375     sameDist[1] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(iscol, &n));
3377     if (!n) {
3378       sameDist[1] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(iscol, &i, &j));
3381       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3383     }
3384 
3385     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3386     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPI_C_BOOL, MPI_LAND, comm));
3387     sameRowDist = tsameDist[0];
3388   }
3389 
3390   if (sameRowDist) {
3391     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3392       /* isrow and iscol have same processor distribution as mat */
3393       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3394       PetscFunctionReturn(PETSC_SUCCESS);
3395     } else { /* sameRowDist */
3396       /* isrow has same processor distribution as mat */
3397       if (call == MAT_INITIAL_MATRIX) {
3398         PetscBool sorted;
3399         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3400         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3401         PetscCall(ISGetSize(iscol, &i));
3402         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3403 
3404         PetscCall(ISSorted(iscol_local, &sorted));
3405         if (sorted) {
3406           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3407           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3408           PetscFunctionReturn(PETSC_SUCCESS);
3409         }
3410       } else { /* call == MAT_REUSE_MATRIX */
3411         IS iscol_sub;
3412         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3413         if (iscol_sub) {
3414           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3415           PetscFunctionReturn(PETSC_SUCCESS);
3416         }
3417       }
3418     }
3419   }
3420 
3421   /* General case: iscol -> iscol_local which has global size of iscol */
3422   if (call == MAT_REUSE_MATRIX) {
3423     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3424     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3425   } else {
3426     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3427   }
3428 
3429   PetscCall(ISGetLocalSize(iscol, &csize));
3430   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3431 
3432   if (call == MAT_INITIAL_MATRIX) {
3433     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3434     PetscCall(ISDestroy(&iscol_local));
3435   }
3436   PetscFunctionReturn(PETSC_SUCCESS);
3437 }
3438 
3439 /*@C
3440   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3441   and "off-diagonal" part of the matrix in CSR format.
3442 
3443   Collective
3444 
3445   Input Parameters:
3446 + comm   - MPI communicator
3447 . M      - the global row size
3448 . N      - the global column size
3449 . A      - "diagonal" portion of matrix
3450 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3451 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter.
3452 
3453   Output Parameter:
3454 . mat - the matrix, with input `A` as its local diagonal matrix
3455 
3456   Level: advanced
3457 
3458   Notes:
3459   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3460 
3461   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3462 
3463   If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update
3464   `B`'s copy on device.  We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or
3465   `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray`
3466   yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`.
3467 
3468   The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others.
3469 
3470 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3471 @*/
MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,PetscInt M,PetscInt N,Mat A,Mat B,PetscInt * garray,Mat * mat)3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3473 {
3474   PetscInt    m, n;
3475   MatType     mpi_mat_type;
3476   Mat_MPIAIJ *mpiaij;
3477   Mat         C;
3478 
3479   PetscFunctionBegin;
3480   PetscCall(MatCreate(comm, &C));
3481   PetscCall(MatGetSize(A, &m, &n));
3482   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3483   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3484 
3485   PetscCall(MatSetSizes(C, m, n, M, N));
3486   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3487   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3488   PetscCall(MatSetType(C, mpi_mat_type));
3489   if (!garray) {
3490     const PetscScalar *ba;
3491 
3492     B->nonzerostate++;
3493     PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */
3494     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
3495   }
3496 
3497   PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs));
3498   PetscCall(PetscLayoutSetUp(C->rmap));
3499   PetscCall(PetscLayoutSetUp(C->cmap));
3500 
3501   mpiaij              = (Mat_MPIAIJ *)C->data;
3502   mpiaij->A           = A;
3503   mpiaij->B           = B;
3504   mpiaij->garray      = garray;
3505   C->preallocated     = PETSC_TRUE;
3506   C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3507 
3508   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3509   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3510   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3511    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3512    */
3513   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3514   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3515   PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3516   *mat = C;
3517   PetscFunctionReturn(PETSC_SUCCESS);
3518 }
3519 
3520 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3521 
MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat * newmat)3522 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3523 {
3524   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3525   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3526   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3527   Mat             M, Msub, B = a->B;
3528   MatScalar      *aa;
3529   Mat_SeqAIJ     *aij;
3530   PetscInt       *garray = a->garray, *colsub, Ncols;
3531   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3532   IS              iscol_sub, iscmap;
3533   const PetscInt *is_idx, *cmap;
3534   PetscBool       allcolumns = PETSC_FALSE;
3535   MPI_Comm        comm;
3536 
3537   PetscFunctionBegin;
3538   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3539   if (call == MAT_REUSE_MATRIX) {
3540     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3541     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3542     PetscCall(ISGetLocalSize(iscol_sub, &count));
3543 
3544     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3545     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3546 
3547     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3548     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3549 
3550     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3551 
3552   } else { /* call == MAT_INITIAL_MATRIX) */
3553     PetscBool flg;
3554 
3555     PetscCall(ISGetLocalSize(iscol, &n));
3556     PetscCall(ISGetSize(iscol, &Ncols));
3557 
3558     /* (1) iscol -> nonscalable iscol_local */
3559     /* Check for special case: each processor gets entire matrix columns */
3560     PetscCall(ISIdentity(iscol_local, &flg));
3561     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3562     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3563     if (allcolumns) {
3564       iscol_sub = iscol_local;
3565       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3566       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3567 
3568     } else {
3569       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3570       PetscInt *idx, *cmap1, k;
3571       PetscCall(PetscMalloc1(Ncols, &idx));
3572       PetscCall(PetscMalloc1(Ncols, &cmap1));
3573       PetscCall(ISGetIndices(iscol_local, &is_idx));
3574       count = 0;
3575       k     = 0;
3576       for (i = 0; i < Ncols; i++) {
3577         j = is_idx[i];
3578         if (j >= cstart && j < cend) {
3579           /* diagonal part of mat */
3580           idx[count]     = j;
3581           cmap1[count++] = i; /* column index in submat */
3582         } else if (Bn) {
3583           /* off-diagonal part of mat */
3584           if (j == garray[k]) {
3585             idx[count]     = j;
3586             cmap1[count++] = i; /* column index in submat */
3587           } else if (j > garray[k]) {
3588             while (j > garray[k] && k < Bn - 1) k++;
3589             if (j == garray[k]) {
3590               idx[count]     = j;
3591               cmap1[count++] = i; /* column index in submat */
3592             }
3593           }
3594         }
3595       }
3596       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3597 
3598       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3599       PetscCall(ISGetBlockSize(iscol, &cbs));
3600       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3601 
3602       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3603     }
3604 
3605     /* (3) Create sequential Msub */
3606     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3607   }
3608 
3609   PetscCall(ISGetLocalSize(iscol_sub, &count));
3610   aij = (Mat_SeqAIJ *)Msub->data;
3611   ii  = aij->i;
3612   PetscCall(ISGetIndices(iscmap, &cmap));
3613 
3614   /*
3615       m - number of local rows
3616       Ncols - number of columns (same on all processors)
3617       rstart - first row in new global matrix generated
3618   */
3619   PetscCall(MatGetSize(Msub, &m, NULL));
3620 
3621   if (call == MAT_INITIAL_MATRIX) {
3622     /* (4) Create parallel newmat */
3623     PetscMPIInt rank, size;
3624     PetscInt    csize;
3625 
3626     PetscCallMPI(MPI_Comm_size(comm, &size));
3627     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3628 
3629     /*
3630         Determine the number of non-zeros in the diagonal and off-diagonal
3631         portions of the matrix in order to do correct preallocation
3632     */
3633 
3634     /* first get start and end of "diagonal" columns */
3635     PetscCall(ISGetLocalSize(iscol, &csize));
3636     if (csize == PETSC_DECIDE) {
3637       PetscCall(ISGetSize(isrow, &mglobal));
3638       if (mglobal == Ncols) { /* square matrix */
3639         nlocal = m;
3640       } else {
3641         nlocal = Ncols / size + ((Ncols % size) > rank);
3642       }
3643     } else {
3644       nlocal = csize;
3645     }
3646     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3647     rstart = rend - nlocal;
3648     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3649 
3650     /* next, compute all the lengths */
3651     jj = aij->j;
3652     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3653     olens = dlens + m;
3654     for (i = 0; i < m; i++) {
3655       jend = ii[i + 1] - ii[i];
3656       olen = 0;
3657       dlen = 0;
3658       for (j = 0; j < jend; j++) {
3659         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3660         else dlen++;
3661         jj++;
3662       }
3663       olens[i] = olen;
3664       dlens[i] = dlen;
3665     }
3666 
3667     PetscCall(ISGetBlockSize(isrow, &bs));
3668     PetscCall(ISGetBlockSize(iscol, &cbs));
3669 
3670     PetscCall(MatCreate(comm, &M));
3671     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3672     PetscCall(MatSetBlockSizes(M, bs, cbs));
3673     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3674     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3675     PetscCall(PetscFree(dlens));
3676 
3677   } else { /* call == MAT_REUSE_MATRIX */
3678     M = *newmat;
3679     PetscCall(MatGetLocalSize(M, &i, NULL));
3680     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3681     PetscCall(MatZeroEntries(M));
3682     /*
3683          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3684        rather than the slower MatSetValues().
3685     */
3686     M->was_assembled = PETSC_TRUE;
3687     M->assembled     = PETSC_FALSE;
3688   }
3689 
3690   /* (5) Set values of Msub to *newmat */
3691   PetscCall(PetscMalloc1(count, &colsub));
3692   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3693 
3694   jj = aij->j;
3695   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3696   for (i = 0; i < m; i++) {
3697     row = rstart + i;
3698     nz  = ii[i + 1] - ii[i];
3699     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3700     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3701     jj += nz;
3702     aa += nz;
3703   }
3704   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3705   PetscCall(ISRestoreIndices(iscmap, &cmap));
3706 
3707   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3708   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3709 
3710   PetscCall(PetscFree(colsub));
3711 
3712   /* save Msub, iscol_sub and iscmap used in processor for next request */
3713   if (call == MAT_INITIAL_MATRIX) {
3714     *newmat = M;
3715     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3716     PetscCall(MatDestroy(&Msub));
3717 
3718     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3719     PetscCall(ISDestroy(&iscol_sub));
3720 
3721     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3722     PetscCall(ISDestroy(&iscmap));
3723 
3724     if (iscol_local) {
3725       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3726       PetscCall(ISDestroy(&iscol_local));
3727     }
3728   }
3729   PetscFunctionReturn(PETSC_SUCCESS);
3730 }
3731 
3732 /*
3733     Not great since it makes two copies of the submatrix, first an SeqAIJ
3734   in local and then by concatenating the local matrices the end result.
3735   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3736 
3737   This requires a sequential iscol with all indices.
3738 */
MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat)3739 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3740 {
3741   PetscMPIInt rank, size;
3742   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3743   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3744   Mat         M, Mreuse;
3745   MatScalar  *aa, *vwork;
3746   MPI_Comm    comm;
3747   Mat_SeqAIJ *aij;
3748   PetscBool   colflag, allcolumns = PETSC_FALSE;
3749 
3750   PetscFunctionBegin;
3751   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3752   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3753   PetscCallMPI(MPI_Comm_size(comm, &size));
3754 
3755   /* Check for special case: each processor gets entire matrix columns */
3756   PetscCall(ISIdentity(iscol, &colflag));
3757   PetscCall(ISGetLocalSize(iscol, &n));
3758   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3759   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3760 
3761   if (call == MAT_REUSE_MATRIX) {
3762     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3763     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3764     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3765   } else {
3766     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3767   }
3768 
3769   /*
3770       m - number of local rows
3771       n - number of columns (same on all processors)
3772       rstart - first row in new global matrix generated
3773   */
3774   PetscCall(MatGetSize(Mreuse, &m, &n));
3775   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3776   if (call == MAT_INITIAL_MATRIX) {
3777     aij = (Mat_SeqAIJ *)Mreuse->data;
3778     ii  = aij->i;
3779     jj  = aij->j;
3780 
3781     /*
3782         Determine the number of non-zeros in the diagonal and off-diagonal
3783         portions of the matrix in order to do correct preallocation
3784     */
3785 
3786     /* first get start and end of "diagonal" columns */
3787     if (csize == PETSC_DECIDE) {
3788       PetscCall(ISGetSize(isrow, &mglobal));
3789       if (mglobal == n) { /* square matrix */
3790         nlocal = m;
3791       } else {
3792         nlocal = n / size + ((n % size) > rank);
3793       }
3794     } else {
3795       nlocal = csize;
3796     }
3797     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3798     rstart = rend - nlocal;
3799     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3800 
3801     /* next, compute all the lengths */
3802     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3803     olens = dlens + m;
3804     for (i = 0; i < m; i++) {
3805       jend = ii[i + 1] - ii[i];
3806       olen = 0;
3807       dlen = 0;
3808       for (j = 0; j < jend; j++) {
3809         if (*jj < rstart || *jj >= rend) olen++;
3810         else dlen++;
3811         jj++;
3812       }
3813       olens[i] = olen;
3814       dlens[i] = dlen;
3815     }
3816     PetscCall(MatCreate(comm, &M));
3817     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3818     PetscCall(MatSetBlockSizes(M, bs, cbs));
3819     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3820     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3821     PetscCall(PetscFree(dlens));
3822   } else {
3823     PetscInt ml, nl;
3824 
3825     M = *newmat;
3826     PetscCall(MatGetLocalSize(M, &ml, &nl));
3827     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3828     PetscCall(MatZeroEntries(M));
3829     /*
3830          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3831        rather than the slower MatSetValues().
3832     */
3833     M->was_assembled = PETSC_TRUE;
3834     M->assembled     = PETSC_FALSE;
3835   }
3836   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3837   aij = (Mat_SeqAIJ *)Mreuse->data;
3838   ii  = aij->i;
3839   jj  = aij->j;
3840 
3841   /* trigger copy to CPU if needed */
3842   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3843   for (i = 0; i < m; i++) {
3844     row   = rstart + i;
3845     nz    = ii[i + 1] - ii[i];
3846     cwork = jj;
3847     jj    = PetscSafePointerPlusOffset(jj, nz);
3848     vwork = aa;
3849     aa    = PetscSafePointerPlusOffset(aa, nz);
3850     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3851   }
3852   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3853 
3854   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3855   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3856   *newmat = M;
3857 
3858   /* save submatrix used in processor for next request */
3859   if (call == MAT_INITIAL_MATRIX) {
3860     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3861     PetscCall(MatDestroy(&Mreuse));
3862   }
3863   PetscFunctionReturn(PETSC_SUCCESS);
3864 }
3865 
MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])3866 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3867 {
3868   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3869   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3870   const PetscInt *JJ;
3871   PetscBool       nooffprocentries;
3872   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3873 
3874   PetscFunctionBegin;
3875   PetscCall(PetscLayoutSetUp(B->rmap));
3876   PetscCall(PetscLayoutSetUp(B->cmap));
3877   m       = B->rmap->n;
3878   cstart  = B->cmap->rstart;
3879   cend    = B->cmap->rend;
3880   rstart  = B->rmap->rstart;
3881   irstart = Ii[0];
3882 
3883   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3884 
3885   if (PetscDefined(USE_DEBUG)) {
3886     for (i = 0; i < m; i++) {
3887       nnz = Ii[i + 1] - Ii[i];
3888       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3889       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3890       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3891       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3892     }
3893   }
3894 
3895   for (i = 0; i < m; i++) {
3896     nnz     = Ii[i + 1] - Ii[i];
3897     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3898     nnz_max = PetscMax(nnz_max, nnz);
3899     d       = 0;
3900     for (j = 0; j < nnz; j++) {
3901       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3902     }
3903     d_nnz[i] = d;
3904     o_nnz[i] = nnz - d;
3905   }
3906   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3907   PetscCall(PetscFree2(d_nnz, o_nnz));
3908 
3909   for (i = 0; i < m; i++) {
3910     ii = i + rstart;
3911     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3912   }
3913   nooffprocentries    = B->nooffprocentries;
3914   B->nooffprocentries = PETSC_TRUE;
3915   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3916   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3917   B->nooffprocentries = nooffprocentries;
3918 
3919   /* count number of entries below block diagonal */
3920   PetscCall(PetscFree(Aij->ld));
3921   PetscCall(PetscCalloc1(m, &ld));
3922   Aij->ld = ld;
3923   for (i = 0; i < m; i++) {
3924     nnz = Ii[i + 1] - Ii[i];
3925     j   = 0;
3926     while (j < nnz && J[j] < cstart) j++;
3927     ld[i] = j;
3928     if (J) J += nnz;
3929   }
3930 
3931   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3932   PetscFunctionReturn(PETSC_SUCCESS);
3933 }
3934 
3935 /*@
3936   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3937   (the default parallel PETSc format).
3938 
3939   Collective
3940 
3941   Input Parameters:
3942 + B - the matrix
3943 . i - the indices into `j` for the start of each local row (indices start with zero)
3944 . j - the column indices for each local row (indices start with zero)
3945 - v - optional values in the matrix
3946 
3947   Level: developer
3948 
3949   Notes:
3950   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3951   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3952   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3953 
3954   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3955 
3956   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3957 
3958   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3959 
3960   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3961   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3962 
3963   The format which is used for the sparse matrix input, is equivalent to a
3964   row-major ordering.. i.e for the following matrix, the input data expected is
3965   as shown
3966 .vb
3967         1 0 0
3968         2 0 3     P0
3969        -------
3970         4 5 6     P1
3971 
3972      Process0 [P0] rows_owned=[0,1]
3973         i =  {0,1,3}  [size = nrow+1  = 2+1]
3974         j =  {0,0,2}  [size = 3]
3975         v =  {1,2,3}  [size = 3]
3976 
3977      Process1 [P1] rows_owned=[2]
3978         i =  {0,3}    [size = nrow+1  = 1+1]
3979         j =  {0,1,2}  [size = 3]
3980         v =  {4,5,6}  [size = 3]
3981 .ve
3982 
3983 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3984           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
3985 @*/
MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3986 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3987 {
3988   PetscFunctionBegin;
3989   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3990   PetscFunctionReturn(PETSC_SUCCESS);
3991 }
3992 
3993 /*@
3994   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3995   (the default parallel PETSc format).  For good matrix assembly performance
3996   the user should preallocate the matrix storage by setting the parameters
3997   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3998 
3999   Collective
4000 
4001   Input Parameters:
4002 + B     - the matrix
4003 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4004            (same value is used for all local rows)
4005 . d_nnz - array containing the number of nonzeros in the various rows of the
4006            DIAGONAL portion of the local submatrix (possibly different for each row)
4007            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4008            The size of this array is equal to the number of local rows, i.e 'm'.
4009            For matrices that will be factored, you must leave room for (and set)
4010            the diagonal entry even if it is zero.
4011 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4012            submatrix (same value is used for all local rows).
4013 - o_nnz - array containing the number of nonzeros in the various rows of the
4014            OFF-DIAGONAL portion of the local submatrix (possibly different for
4015            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4016            structure. The size of this array is equal to the number
4017            of local rows, i.e 'm'.
4018 
4019   Example Usage:
4020   Consider the following 8x8 matrix with 34 non-zero values, that is
4021   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4022   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4023   as follows
4024 
4025 .vb
4026             1  2  0  |  0  3  0  |  0  4
4027     Proc0   0  5  6  |  7  0  0  |  8  0
4028             9  0 10  | 11  0  0  | 12  0
4029     -------------------------------------
4030            13  0 14  | 15 16 17  |  0  0
4031     Proc1   0 18  0  | 19 20 21  |  0  0
4032             0  0  0  | 22 23  0  | 24  0
4033     -------------------------------------
4034     Proc2  25 26 27  |  0  0 28  | 29  0
4035            30  0  0  | 31 32 33  |  0 34
4036 .ve
4037 
4038   This can be represented as a collection of submatrices as
4039 .vb
4040       A B C
4041       D E F
4042       G H I
4043 .ve
4044 
4045   Where the submatrices A,B,C are owned by proc0, D,E,F are
4046   owned by proc1, G,H,I are owned by proc2.
4047 
4048   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4049   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4050   The 'M','N' parameters are 8,8, and have the same values on all procs.
4051 
4052   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4053   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4054   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4055   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4056   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4057   matrix, and [DF] as another `MATSEQAIJ` matrix.
4058 
4059   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4060   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4061   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4062   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4063   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4064   In this case, the values of `d_nz`, `o_nz` are
4065 .vb
4066      proc0  dnz = 2, o_nz = 2
4067      proc1  dnz = 3, o_nz = 2
4068      proc2  dnz = 1, o_nz = 4
4069 .ve
4070   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4071   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4072   for proc3. i.e we are using 12+15+10=37 storage locations to store
4073   34 values.
4074 
4075   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4076   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4077   In the above case the values for `d_nnz`, `o_nnz` are
4078 .vb
4079      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4080      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4081      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4082 .ve
4083   Here the space allocated is sum of all the above values i.e 34, and
4084   hence pre-allocation is perfect.
4085 
4086   Level: intermediate
4087 
4088   Notes:
4089   If the *_nnz parameter is given then the *_nz parameter is ignored
4090 
4091   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4092   storage.  The stored row and column indices begin with zero.
4093   See [Sparse Matrices](sec_matsparse) for details.
4094 
4095   The parallel matrix is partitioned such that the first m0 rows belong to
4096   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4097   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4098 
4099   The DIAGONAL portion of the local submatrix of a processor can be defined
4100   as the submatrix which is obtained by extraction the part corresponding to
4101   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4102   first row that belongs to the processor, r2 is the last row belonging to
4103   the this processor, and c1-c2 is range of indices of the local part of a
4104   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4105   common case of a square matrix, the row and column ranges are the same and
4106   the DIAGONAL part is also square. The remaining portion of the local
4107   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4108 
4109   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4110 
4111   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4112   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4113   You can also run with the option `-info` and look for messages with the string
4114   malloc in them to see if additional memory allocation was needed.
4115 
4116 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4117           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4118 @*/
MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])4119 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4120 {
4121   PetscFunctionBegin;
4122   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4123   PetscValidType(B, 1);
4124   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4125   PetscFunctionReturn(PETSC_SUCCESS);
4126 }
4127 
4128 /*@
4129   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4130   CSR format for the local rows.
4131 
4132   Collective
4133 
4134   Input Parameters:
4135 + comm - MPI communicator
4136 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4137 . n    - This value should be the same as the local size used in creating the
4138          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4139          calculated if `N` is given) For square matrices n is almost always `m`.
4140 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4141 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4142 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4143 . j    - global column indices
4144 - a    - optional matrix values
4145 
4146   Output Parameter:
4147 . mat - the matrix
4148 
4149   Level: intermediate
4150 
4151   Notes:
4152   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4153   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4154   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4155 
4156   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4157 
4158   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4159 
4160   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4161   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4162 
4163   The format which is used for the sparse matrix input, is equivalent to a
4164   row-major ordering, i.e., for the following matrix, the input data expected is
4165   as shown
4166 .vb
4167         1 0 0
4168         2 0 3     P0
4169        -------
4170         4 5 6     P1
4171 
4172      Process0 [P0] rows_owned=[0,1]
4173         i =  {0,1,3}  [size = nrow+1  = 2+1]
4174         j =  {0,0,2}  [size = 3]
4175         v =  {1,2,3}  [size = 3]
4176 
4177      Process1 [P1] rows_owned=[2]
4178         i =  {0,3}    [size = nrow+1  = 1+1]
4179         j =  {0,1,2}  [size = 3]
4180         v =  {4,5,6}  [size = 3]
4181 .ve
4182 
4183 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4184           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4185 @*/
MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)4186 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4187 {
4188   PetscFunctionBegin;
4189   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4190   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4191   PetscCall(MatCreate(comm, mat));
4192   PetscCall(MatSetSizes(*mat, m, n, M, N));
4193   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4194   PetscCall(MatSetType(*mat, MATMPIAIJ));
4195   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4196   PetscFunctionReturn(PETSC_SUCCESS);
4197 }
4198 
4199 /*@
4200   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4201   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4202   from `MatCreateMPIAIJWithArrays()`
4203 
4204   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4205 
4206   Collective
4207 
4208   Input Parameters:
4209 + mat - the matrix
4210 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4211 . n   - This value should be the same as the local size used in creating the
4212        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4213        calculated if N is given) For square matrices n is almost always m.
4214 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4215 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4216 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4217 . J   - column indices
4218 - v   - matrix values
4219 
4220   Level: deprecated
4221 
4222 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4223           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4224 @*/
MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4226 {
4227   PetscInt        nnz, i;
4228   PetscBool       nooffprocentries;
4229   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4230   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4231   PetscScalar    *ad, *ao;
4232   PetscInt        ldi, Iii, md;
4233   const PetscInt *Adi = Ad->i;
4234   PetscInt       *ld  = Aij->ld;
4235 
4236   PetscFunctionBegin;
4237   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4238   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4239   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4240   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4241 
4242   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4243   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4244 
4245   for (i = 0; i < m; i++) {
4246     if (PetscDefined(USE_DEBUG)) {
4247       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4248         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4249         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4250       }
4251     }
4252     nnz = Ii[i + 1] - Ii[i];
4253     Iii = Ii[i];
4254     ldi = ld[i];
4255     md  = Adi[i + 1] - Adi[i];
4256     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4257     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4258     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4259     ad += md;
4260     ao += nnz - md;
4261   }
4262   nooffprocentries      = mat->nooffprocentries;
4263   mat->nooffprocentries = PETSC_TRUE;
4264   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4265   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4266   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4267   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4268   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4269   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4270   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4271   mat->nooffprocentries = nooffprocentries;
4272   PetscFunctionReturn(PETSC_SUCCESS);
4273 }
4274 
4275 /*@
4276   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4277 
4278   Collective
4279 
4280   Input Parameters:
4281 + mat - the matrix
4282 - v   - matrix values, stored by row
4283 
4284   Level: intermediate
4285 
4286   Notes:
4287   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4288 
4289   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4290 
4291 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4292           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4293 @*/
MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])4294 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4295 {
4296   PetscInt        nnz, i, m;
4297   PetscBool       nooffprocentries;
4298   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4299   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4300   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4301   PetscScalar    *ad, *ao;
4302   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4303   PetscInt        ldi, Iii, md;
4304   PetscInt       *ld = Aij->ld;
4305 
4306   PetscFunctionBegin;
4307   m = mat->rmap->n;
4308 
4309   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4310   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4311   Iii = 0;
4312   for (i = 0; i < m; i++) {
4313     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4314     ldi = ld[i];
4315     md  = Adi[i + 1] - Adi[i];
4316     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4317     ad += md;
4318     if (ao) {
4319       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4320       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4321       ao += nnz - md;
4322     }
4323     Iii += nnz;
4324   }
4325   nooffprocentries      = mat->nooffprocentries;
4326   mat->nooffprocentries = PETSC_TRUE;
4327   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4328   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4329   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4330   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4331   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4332   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4333   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4334   mat->nooffprocentries = nooffprocentries;
4335   PetscFunctionReturn(PETSC_SUCCESS);
4336 }
4337 
4338 /*@
4339   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4340   (the default parallel PETSc format).  For good matrix assembly performance
4341   the user should preallocate the matrix storage by setting the parameters
4342   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4343 
4344   Collective
4345 
4346   Input Parameters:
4347 + comm  - MPI communicator
4348 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4349           This value should be the same as the local size used in creating the
4350           y vector for the matrix-vector product y = Ax.
4351 . n     - This value should be the same as the local size used in creating the
4352           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4353           calculated if N is given) For square matrices n is almost always m.
4354 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4355 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4356 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4357           (same value is used for all local rows)
4358 . d_nnz - array containing the number of nonzeros in the various rows of the
4359           DIAGONAL portion of the local submatrix (possibly different for each row)
4360           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4361           The size of this array is equal to the number of local rows, i.e 'm'.
4362 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4363           submatrix (same value is used for all local rows).
4364 - o_nnz - array containing the number of nonzeros in the various rows of the
4365           OFF-DIAGONAL portion of the local submatrix (possibly different for
4366           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4367           structure. The size of this array is equal to the number
4368           of local rows, i.e 'm'.
4369 
4370   Output Parameter:
4371 . A - the matrix
4372 
4373   Options Database Keys:
4374 + -mat_no_inode                     - Do not use inodes
4375 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4376 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4377                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4378                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4379 
4380   Level: intermediate
4381 
4382   Notes:
4383   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4384   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4385   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4386 
4387   If the *_nnz parameter is given then the *_nz parameter is ignored
4388 
4389   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4390   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4391   storage requirements for this matrix.
4392 
4393   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4394   processor than it must be used on all processors that share the object for
4395   that argument.
4396 
4397   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4398   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4399 
4400   The user MUST specify either the local or global matrix dimensions
4401   (possibly both).
4402 
4403   The parallel matrix is partitioned across processors such that the
4404   first `m0` rows belong to process 0, the next `m1` rows belong to
4405   process 1, the next `m2` rows belong to process 2, etc., where
4406   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4407   values corresponding to [m x N] submatrix.
4408 
4409   The columns are logically partitioned with the n0 columns belonging
4410   to 0th partition, the next n1 columns belonging to the next
4411   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4412 
4413   The DIAGONAL portion of the local submatrix on any given processor
4414   is the submatrix corresponding to the rows and columns m,n
4415   corresponding to the given processor. i.e diagonal matrix on
4416   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4417   etc. The remaining portion of the local submatrix [m x (N-n)]
4418   constitute the OFF-DIAGONAL portion. The example below better
4419   illustrates this concept. The two matrices, the DIAGONAL portion and
4420   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4421 
4422   For a square global matrix we define each processor's diagonal portion
4423   to be its local rows and the corresponding columns (a square submatrix);
4424   each processor's off-diagonal portion encompasses the remainder of the
4425   local matrix (a rectangular submatrix).
4426 
4427   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4428 
4429   When calling this routine with a single process communicator, a matrix of
4430   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4431   type of communicator, use the construction mechanism
4432 .vb
4433   MatCreate(..., &A);
4434   MatSetType(A, MATMPIAIJ);
4435   MatSetSizes(A, m, n, M, N);
4436   MatMPIAIJSetPreallocation(A, ...);
4437 .ve
4438 
4439   By default, this format uses inodes (identical nodes) when possible.
4440   We search for consecutive rows with the same nonzero structure, thereby
4441   reusing matrix information to achieve increased efficiency.
4442 
4443   Example Usage:
4444   Consider the following 8x8 matrix with 34 non-zero values, that is
4445   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4446   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4447   as follows
4448 
4449 .vb
4450             1  2  0  |  0  3  0  |  0  4
4451     Proc0   0  5  6  |  7  0  0  |  8  0
4452             9  0 10  | 11  0  0  | 12  0
4453     -------------------------------------
4454            13  0 14  | 15 16 17  |  0  0
4455     Proc1   0 18  0  | 19 20 21  |  0  0
4456             0  0  0  | 22 23  0  | 24  0
4457     -------------------------------------
4458     Proc2  25 26 27  |  0  0 28  | 29  0
4459            30  0  0  | 31 32 33  |  0 34
4460 .ve
4461 
4462   This can be represented as a collection of submatrices as
4463 
4464 .vb
4465       A B C
4466       D E F
4467       G H I
4468 .ve
4469 
4470   Where the submatrices A,B,C are owned by proc0, D,E,F are
4471   owned by proc1, G,H,I are owned by proc2.
4472 
4473   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4474   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4475   The 'M','N' parameters are 8,8, and have the same values on all procs.
4476 
4477   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4478   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4479   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4480   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4481   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4482   matrix, and [DF] as another SeqAIJ matrix.
4483 
4484   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4485   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4486   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4487   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4488   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4489   In this case, the values of `d_nz`,`o_nz` are
4490 .vb
4491      proc0  dnz = 2, o_nz = 2
4492      proc1  dnz = 3, o_nz = 2
4493      proc2  dnz = 1, o_nz = 4
4494 .ve
4495   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4496   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4497   for proc3. i.e we are using 12+15+10=37 storage locations to store
4498   34 values.
4499 
4500   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4501   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4502   In the above case the values for d_nnz,o_nnz are
4503 .vb
4504      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4505      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4506      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4507 .ve
4508   Here the space allocated is sum of all the above values i.e 34, and
4509   hence pre-allocation is perfect.
4510 
4511 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4512           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4513           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4514 @*/
MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4516 {
4517   PetscMPIInt size;
4518 
4519   PetscFunctionBegin;
4520   PetscCall(MatCreate(comm, A));
4521   PetscCall(MatSetSizes(*A, m, n, M, N));
4522   PetscCallMPI(MPI_Comm_size(comm, &size));
4523   if (size > 1) {
4524     PetscCall(MatSetType(*A, MATMPIAIJ));
4525     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4526   } else {
4527     PetscCall(MatSetType(*A, MATSEQAIJ));
4528     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4529   }
4530   PetscFunctionReturn(PETSC_SUCCESS);
4531 }
4532 
4533 /*@C
4534   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4535 
4536   Not Collective
4537 
4538   Input Parameter:
4539 . A - The `MATMPIAIJ` matrix
4540 
4541   Output Parameters:
4542 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4543 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4544 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4545 
4546   Level: intermediate
4547 
4548   Note:
4549   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4550   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4551   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4552   local column numbers to global column numbers in the original matrix.
4553 
4554 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4555 @*/
MatMPIAIJGetSeqAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])4556 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4557 {
4558   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4559   PetscBool   flg;
4560 
4561   PetscFunctionBegin;
4562   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4563   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4564   if (Ad) *Ad = a->A;
4565   if (Ao) *Ao = a->B;
4566   if (colmap) *colmap = a->garray;
4567   PetscFunctionReturn(PETSC_SUCCESS);
4568 }
4569 
MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4571 {
4572   PetscInt     m, N, i, rstart, nnz, Ii;
4573   PetscInt    *indx;
4574   PetscScalar *values;
4575   MatType      rootType;
4576 
4577   PetscFunctionBegin;
4578   PetscCall(MatGetSize(inmat, &m, &N));
4579   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4580     PetscInt *dnz, *onz, sum, bs, cbs;
4581 
4582     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4583     /* Check sum(n) = N */
4584     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4585     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4586 
4587     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4588     rstart -= m;
4589 
4590     MatPreallocateBegin(comm, m, n, dnz, onz);
4591     for (i = 0; i < m; i++) {
4592       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4593       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4594       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4595     }
4596 
4597     PetscCall(MatCreate(comm, outmat));
4598     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4599     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4600     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4601     PetscCall(MatGetRootType_Private(inmat, &rootType));
4602     PetscCall(MatSetType(*outmat, rootType));
4603     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4604     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4605     MatPreallocateEnd(dnz, onz);
4606     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4607   }
4608 
4609   /* numeric phase */
4610   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4611   for (i = 0; i < m; i++) {
4612     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4613     Ii = i + rstart;
4614     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4615     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4616   }
4617   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4618   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4619   PetscFunctionReturn(PETSC_SUCCESS);
4620 }
4621 
MatMergeSeqsToMPIDestroy(PetscCtxRt data)4622 static PetscErrorCode MatMergeSeqsToMPIDestroy(PetscCtxRt data)
4623 {
4624   MatMergeSeqsToMPI *merge = *(MatMergeSeqsToMPI **)data;
4625 
4626   PetscFunctionBegin;
4627   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4628   PetscCall(PetscFree(merge->id_r));
4629   PetscCall(PetscFree(merge->len_s));
4630   PetscCall(PetscFree(merge->len_r));
4631   PetscCall(PetscFree(merge->bi));
4632   PetscCall(PetscFree(merge->bj));
4633   PetscCall(PetscFree(merge->buf_ri[0]));
4634   PetscCall(PetscFree(merge->buf_ri));
4635   PetscCall(PetscFree(merge->buf_rj[0]));
4636   PetscCall(PetscFree(merge->buf_rj));
4637   PetscCall(PetscFree(merge->coi));
4638   PetscCall(PetscFree(merge->coj));
4639   PetscCall(PetscFree(merge->owners_co));
4640   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4641   PetscCall(PetscFree(merge));
4642   PetscFunctionReturn(PETSC_SUCCESS);
4643 }
4644 
4645 #include <../src/mat/utils/freespace.h>
4646 #include <petscbt.h>
4647 
MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)4648 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4649 {
4650   MPI_Comm           comm;
4651   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)seqmat->data;
4652   PetscMPIInt        size, rank, taga, *len_s;
4653   PetscInt           N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4654   PetscMPIInt        proc, k;
4655   PetscInt         **buf_ri, **buf_rj;
4656   PetscInt           anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4657   PetscInt           nrows, **buf_ri_k, **nextrow, **nextai;
4658   MPI_Request       *s_waits, *r_waits;
4659   MPI_Status        *status;
4660   const MatScalar   *aa, *a_a;
4661   MatScalar        **abuf_r, *ba_i;
4662   MatMergeSeqsToMPI *merge;
4663   PetscContainer     container;
4664 
4665   PetscFunctionBegin;
4666   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4667   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4668 
4669   PetscCallMPI(MPI_Comm_size(comm, &size));
4670   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4671 
4672   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4673   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4674   PetscCall(PetscContainerGetPointer(container, &merge));
4675   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4676   aa = a_a;
4677 
4678   bi     = merge->bi;
4679   bj     = merge->bj;
4680   buf_ri = merge->buf_ri;
4681   buf_rj = merge->buf_rj;
4682 
4683   PetscCall(PetscMalloc1(size, &status));
4684   owners = merge->rowmap->range;
4685   len_s  = merge->len_s;
4686 
4687   /* send and recv matrix values */
4688   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4689   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4690 
4691   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4692   for (proc = 0, k = 0; proc < size; proc++) {
4693     if (!len_s[proc]) continue;
4694     i = owners[proc];
4695     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4696     k++;
4697   }
4698 
4699   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4700   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4701   PetscCall(PetscFree(status));
4702 
4703   PetscCall(PetscFree(s_waits));
4704   PetscCall(PetscFree(r_waits));
4705 
4706   /* insert mat values of mpimat */
4707   PetscCall(PetscMalloc1(N, &ba_i));
4708   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4709 
4710   for (k = 0; k < merge->nrecv; k++) {
4711     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4712     nrows       = *buf_ri_k[k];
4713     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4714     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4715   }
4716 
4717   /* set values of ba */
4718   m = merge->rowmap->n;
4719   for (i = 0; i < m; i++) {
4720     arow = owners[rank] + i;
4721     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4722     bnzi = bi[i + 1] - bi[i];
4723     PetscCall(PetscArrayzero(ba_i, bnzi));
4724 
4725     /* add local non-zero vals of this proc's seqmat into ba */
4726     anzi   = ai[arow + 1] - ai[arow];
4727     aj     = a->j + ai[arow];
4728     aa     = a_a + ai[arow];
4729     nextaj = 0;
4730     for (j = 0; nextaj < anzi; j++) {
4731       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4732         ba_i[j] += aa[nextaj++];
4733       }
4734     }
4735 
4736     /* add received vals into ba */
4737     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4738       /* i-th row */
4739       if (i == *nextrow[k]) {
4740         anzi   = *(nextai[k] + 1) - *nextai[k];
4741         aj     = buf_rj[k] + *nextai[k];
4742         aa     = abuf_r[k] + *nextai[k];
4743         nextaj = 0;
4744         for (j = 0; nextaj < anzi; j++) {
4745           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4746             ba_i[j] += aa[nextaj++];
4747           }
4748         }
4749         nextrow[k]++;
4750         nextai[k]++;
4751       }
4752     }
4753     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4754   }
4755   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4756   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4757   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4758 
4759   PetscCall(PetscFree(abuf_r[0]));
4760   PetscCall(PetscFree(abuf_r));
4761   PetscCall(PetscFree(ba_i));
4762   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4763   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4764   PetscFunctionReturn(PETSC_SUCCESS);
4765 }
4766 
MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat * mpimat)4767 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4768 {
4769   Mat                B_mpi;
4770   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)seqmat->data;
4771   PetscMPIInt        size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4772   PetscInt         **buf_rj, **buf_ri, **buf_ri_k;
4773   PetscInt           M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4774   PetscInt           len, *dnz, *onz, bs, cbs;
4775   PetscInt           k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4776   PetscInt           nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4777   MPI_Request       *si_waits, *sj_waits, *ri_waits, *rj_waits;
4778   MPI_Status        *status;
4779   PetscFreeSpaceList free_space = NULL, current_space = NULL;
4780   PetscBT            lnkbt;
4781   MatMergeSeqsToMPI *merge;
4782   PetscContainer     container;
4783 
4784   PetscFunctionBegin;
4785   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4786 
4787   /* make sure it is a PETSc comm */
4788   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4789   PetscCallMPI(MPI_Comm_size(comm, &size));
4790   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4791 
4792   PetscCall(PetscNew(&merge));
4793   PetscCall(PetscMalloc1(size, &status));
4794 
4795   /* determine row ownership */
4796   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4797   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4798   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4799   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4800   PetscCall(PetscLayoutSetUp(merge->rowmap));
4801   PetscCall(PetscMalloc1(size, &len_si));
4802   PetscCall(PetscMalloc1(size, &merge->len_s));
4803 
4804   m      = merge->rowmap->n;
4805   owners = merge->rowmap->range;
4806 
4807   /* determine the number of messages to send, their lengths */
4808   len_s = merge->len_s;
4809 
4810   len          = 0; /* length of buf_si[] */
4811   merge->nsend = 0;
4812   for (PetscMPIInt proc = 0; proc < size; proc++) {
4813     len_si[proc] = 0;
4814     if (proc == rank) {
4815       len_s[proc] = 0;
4816     } else {
4817       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4818       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4819     }
4820     if (len_s[proc]) {
4821       merge->nsend++;
4822       nrows = 0;
4823       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4824         if (ai[i + 1] > ai[i]) nrows++;
4825       }
4826       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4827       len += len_si[proc];
4828     }
4829   }
4830 
4831   /* determine the number and length of messages to receive for ij-structure */
4832   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4833   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4834 
4835   /* post the Irecv of j-structure */
4836   PetscCall(PetscCommGetNewTag(comm, &tagj));
4837   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4838 
4839   /* post the Isend of j-structure */
4840   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4841 
4842   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4843     if (!len_s[proc]) continue;
4844     i = owners[proc];
4845     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4846     k++;
4847   }
4848 
4849   /* receives and sends of j-structure are complete */
4850   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4851   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4852 
4853   /* send and recv i-structure */
4854   PetscCall(PetscCommGetNewTag(comm, &tagi));
4855   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4856 
4857   PetscCall(PetscMalloc1(len + 1, &buf_s));
4858   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4859   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4860     if (!len_s[proc]) continue;
4861     /* form outgoing message for i-structure:
4862          buf_si[0]:                 nrows to be sent
4863                [1:nrows]:           row index (global)
4864                [nrows+1:2*nrows+1]: i-structure index
4865     */
4866     nrows       = len_si[proc] / 2 - 1;
4867     buf_si_i    = buf_si + nrows + 1;
4868     buf_si[0]   = nrows;
4869     buf_si_i[0] = 0;
4870     nrows       = 0;
4871     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4872       anzi = ai[i + 1] - ai[i];
4873       if (anzi) {
4874         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4875         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4876         nrows++;
4877       }
4878     }
4879     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4880     k++;
4881     buf_si += len_si[proc];
4882   }
4883 
4884   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4885   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4886 
4887   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4888   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4889 
4890   PetscCall(PetscFree(len_si));
4891   PetscCall(PetscFree(len_ri));
4892   PetscCall(PetscFree(rj_waits));
4893   PetscCall(PetscFree2(si_waits, sj_waits));
4894   PetscCall(PetscFree(ri_waits));
4895   PetscCall(PetscFree(buf_s));
4896   PetscCall(PetscFree(status));
4897 
4898   /* compute a local seq matrix in each processor */
4899   /* allocate bi array and free space for accumulating nonzero column info */
4900   PetscCall(PetscMalloc1(m + 1, &bi));
4901   bi[0] = 0;
4902 
4903   /* create and initialize a linked list */
4904   nlnk = N + 1;
4905   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4906 
4907   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4908   len = ai[owners[rank + 1]] - ai[owners[rank]];
4909   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4910 
4911   current_space = free_space;
4912 
4913   /* determine symbolic info for each local row */
4914   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4915 
4916   for (k = 0; k < merge->nrecv; k++) {
4917     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4918     nrows       = *buf_ri_k[k];
4919     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4920     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4921   }
4922 
4923   MatPreallocateBegin(comm, m, n, dnz, onz);
4924   len = 0;
4925   for (i = 0; i < m; i++) {
4926     bnzi = 0;
4927     /* add local non-zero cols of this proc's seqmat into lnk */
4928     arow = owners[rank] + i;
4929     anzi = ai[arow + 1] - ai[arow];
4930     aj   = a->j + ai[arow];
4931     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4932     bnzi += nlnk;
4933     /* add received col data into lnk */
4934     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4935       if (i == *nextrow[k]) {            /* i-th row */
4936         anzi = *(nextai[k] + 1) - *nextai[k];
4937         aj   = buf_rj[k] + *nextai[k];
4938         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4939         bnzi += nlnk;
4940         nextrow[k]++;
4941         nextai[k]++;
4942       }
4943     }
4944     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4945 
4946     /* if free space is not available, make more free space */
4947     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4948     /* copy data into free space, then initialize lnk */
4949     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4950     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4951 
4952     current_space->array += bnzi;
4953     current_space->local_used += bnzi;
4954     current_space->local_remaining -= bnzi;
4955 
4956     bi[i + 1] = bi[i] + bnzi;
4957   }
4958 
4959   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4960 
4961   PetscCall(PetscMalloc1(bi[m], &bj));
4962   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4963   PetscCall(PetscLLDestroy(lnk, lnkbt));
4964 
4965   /* create symbolic parallel matrix B_mpi */
4966   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4967   PetscCall(MatCreate(comm, &B_mpi));
4968   if (n == PETSC_DECIDE) {
4969     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4970   } else {
4971     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4972   }
4973   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4974   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4975   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4976   MatPreallocateEnd(dnz, onz);
4977   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4978 
4979   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4980   B_mpi->assembled = PETSC_FALSE;
4981   merge->bi        = bi;
4982   merge->bj        = bj;
4983   merge->buf_ri    = buf_ri;
4984   merge->buf_rj    = buf_rj;
4985   merge->coi       = NULL;
4986   merge->coj       = NULL;
4987   merge->owners_co = NULL;
4988 
4989   PetscCall(PetscCommDestroy(&comm));
4990 
4991   /* attach the supporting struct to B_mpi for reuse */
4992   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
4993   PetscCall(PetscContainerSetPointer(container, merge));
4994   PetscCall(PetscContainerSetCtxDestroy(container, MatMergeSeqsToMPIDestroy));
4995   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
4996   PetscCall(PetscContainerDestroy(&container));
4997   *mpimat = B_mpi;
4998 
4999   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5000   PetscFunctionReturn(PETSC_SUCCESS);
5001 }
5002 
5003 /*@
5004   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5005   matrices from each processor
5006 
5007   Collective
5008 
5009   Input Parameters:
5010 + comm   - the communicators the parallel matrix will live on
5011 . seqmat - the input sequential matrices
5012 . m      - number of local rows (or `PETSC_DECIDE`)
5013 . n      - number of local columns (or `PETSC_DECIDE`)
5014 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5015 
5016   Output Parameter:
5017 . mpimat - the parallel matrix generated
5018 
5019   Level: advanced
5020 
5021   Note:
5022   The dimensions of the sequential matrix in each processor MUST be the same.
5023   The input seqmat is included into the container `MatMergeSeqsToMPIDestroy`, and will be
5024   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5025 
5026 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5027 @*/
MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat * mpimat)5028 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5029 {
5030   PetscMPIInt size;
5031 
5032   PetscFunctionBegin;
5033   PetscCallMPI(MPI_Comm_size(comm, &size));
5034   if (size == 1) {
5035     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5036     if (scall == MAT_INITIAL_MATRIX) {
5037       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5038     } else {
5039       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5040     }
5041     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5042     PetscFunctionReturn(PETSC_SUCCESS);
5043   }
5044   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5045   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5046   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5047   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5048   PetscFunctionReturn(PETSC_SUCCESS);
5049 }
5050 
5051 /*@
5052   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5053 
5054   Not Collective
5055 
5056   Input Parameter:
5057 . A - the matrix
5058 
5059   Output Parameter:
5060 . A_loc - the local sequential matrix generated
5061 
5062   Level: developer
5063 
5064   Notes:
5065   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5066   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5067   `n` is the global column count obtained with `MatGetSize()`
5068 
5069   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5070 
5071   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5072 
5073   Destroy the matrix with `MatDestroy()`
5074 
5075 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5076 @*/
MatAIJGetLocalMat(Mat A,Mat * A_loc)5077 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5078 {
5079   PetscBool mpi;
5080 
5081   PetscFunctionBegin;
5082   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5083   if (mpi) {
5084     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5085   } else {
5086     *A_loc = A;
5087     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5088   }
5089   PetscFunctionReturn(PETSC_SUCCESS);
5090 }
5091 
5092 /*@
5093   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5094 
5095   Not Collective
5096 
5097   Input Parameters:
5098 + A     - the matrix
5099 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5100 
5101   Output Parameter:
5102 . A_loc - the local sequential matrix generated
5103 
5104   Level: developer
5105 
5106   Notes:
5107   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5108   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5109   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5110 
5111   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5112 
5113   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5114   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5115   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5116   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5117 
5118 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5119 @*/
MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat * A_loc)5120 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5121 {
5122   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5123   Mat_SeqAIJ        *mat, *a, *b;
5124   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5125   const PetscScalar *aa, *ba, *aav, *bav;
5126   PetscScalar       *ca, *cam;
5127   PetscMPIInt        size;
5128   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5129   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5130   PetscBool          match;
5131 
5132   PetscFunctionBegin;
5133   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5134   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5135   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5136   if (size == 1) {
5137     if (scall == MAT_INITIAL_MATRIX) {
5138       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5139       *A_loc = mpimat->A;
5140     } else if (scall == MAT_REUSE_MATRIX) {
5141       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5142     }
5143     PetscFunctionReturn(PETSC_SUCCESS);
5144   }
5145 
5146   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5147   a  = (Mat_SeqAIJ *)mpimat->A->data;
5148   b  = (Mat_SeqAIJ *)mpimat->B->data;
5149   ai = a->i;
5150   aj = a->j;
5151   bi = b->i;
5152   bj = b->j;
5153   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5154   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5155   aa = aav;
5156   ba = bav;
5157   if (scall == MAT_INITIAL_MATRIX) {
5158     PetscCall(PetscMalloc1(1 + am, &ci));
5159     ci[0] = 0;
5160     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5161     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5162     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5163     k = 0;
5164     for (i = 0; i < am; i++) {
5165       ncols_o = bi[i + 1] - bi[i];
5166       ncols_d = ai[i + 1] - ai[i];
5167       /* off-diagonal portion of A */
5168       for (jo = 0; jo < ncols_o; jo++) {
5169         col = cmap[*bj];
5170         if (col >= cstart) break;
5171         cj[k] = col;
5172         bj++;
5173         ca[k++] = *ba++;
5174       }
5175       /* diagonal portion of A */
5176       for (j = 0; j < ncols_d; j++) {
5177         cj[k]   = cstart + *aj++;
5178         ca[k++] = *aa++;
5179       }
5180       /* off-diagonal portion of A */
5181       for (j = jo; j < ncols_o; j++) {
5182         cj[k]   = cmap[*bj++];
5183         ca[k++] = *ba++;
5184       }
5185     }
5186     /* put together the new matrix */
5187     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5188     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5189     /* Since these are PETSc arrays, change flags to free them as necessary. */
5190     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5191     mat->free_a  = PETSC_TRUE;
5192     mat->free_ij = PETSC_TRUE;
5193     mat->nonew   = 0;
5194   } else if (scall == MAT_REUSE_MATRIX) {
5195     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5196     ci  = mat->i;
5197     cj  = mat->j;
5198     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5199     for (i = 0; i < am; i++) {
5200       /* off-diagonal portion of A */
5201       ncols_o = bi[i + 1] - bi[i];
5202       for (jo = 0; jo < ncols_o; jo++) {
5203         col = cmap[*bj];
5204         if (col >= cstart) break;
5205         *cam++ = *ba++;
5206         bj++;
5207       }
5208       /* diagonal portion of A */
5209       ncols_d = ai[i + 1] - ai[i];
5210       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5211       /* off-diagonal portion of A */
5212       for (j = jo; j < ncols_o; j++) {
5213         *cam++ = *ba++;
5214         bj++;
5215       }
5216     }
5217     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5218   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5219   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5220   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5221   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5222   PetscFunctionReturn(PETSC_SUCCESS);
5223 }
5224 
5225 /*@
5226   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5227   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5228 
5229   Not Collective
5230 
5231   Input Parameters:
5232 + A     - the matrix
5233 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5234 
5235   Output Parameters:
5236 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5237 - A_loc - the local sequential matrix generated
5238 
5239   Level: developer
5240 
5241   Note:
5242   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5243   part, then those associated with the off-diagonal part (in its local ordering)
5244 
5245 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5246 @*/
MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS * glob,Mat * A_loc)5247 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5248 {
5249   Mat             Ao, Ad;
5250   const PetscInt *cmap;
5251   PetscMPIInt     size;
5252   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5253 
5254   PetscFunctionBegin;
5255   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5256   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5257   if (size == 1) {
5258     if (scall == MAT_INITIAL_MATRIX) {
5259       PetscCall(PetscObjectReference((PetscObject)Ad));
5260       *A_loc = Ad;
5261     } else if (scall == MAT_REUSE_MATRIX) {
5262       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5263     }
5264     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5265     PetscFunctionReturn(PETSC_SUCCESS);
5266   }
5267   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5268   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5269   if (f) {
5270     PetscCall((*f)(A, scall, glob, A_loc));
5271   } else {
5272     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5273     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5274     Mat_SeqAIJ        *c;
5275     PetscInt          *ai = a->i, *aj = a->j;
5276     PetscInt          *bi = b->i, *bj = b->j;
5277     PetscInt          *ci, *cj;
5278     const PetscScalar *aa, *ba;
5279     PetscScalar       *ca;
5280     PetscInt           i, j, am, dn, on;
5281 
5282     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5283     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5284     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5285     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5286     if (scall == MAT_INITIAL_MATRIX) {
5287       PetscInt k;
5288       PetscCall(PetscMalloc1(1 + am, &ci));
5289       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5290       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5291       ci[0] = 0;
5292       for (i = 0, k = 0; i < am; i++) {
5293         const PetscInt ncols_o = bi[i + 1] - bi[i];
5294         const PetscInt ncols_d = ai[i + 1] - ai[i];
5295         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5296         /* diagonal portion of A */
5297         for (j = 0; j < ncols_d; j++, k++) {
5298           cj[k] = *aj++;
5299           ca[k] = *aa++;
5300         }
5301         /* off-diagonal portion of A */
5302         for (j = 0; j < ncols_o; j++, k++) {
5303           cj[k] = dn + *bj++;
5304           ca[k] = *ba++;
5305         }
5306       }
5307       /* put together the new matrix */
5308       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5309       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5310       /* Since these are PETSc arrays, change flags to free them as necessary. */
5311       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5312       c->free_a  = PETSC_TRUE;
5313       c->free_ij = PETSC_TRUE;
5314       c->nonew   = 0;
5315       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5316     } else if (scall == MAT_REUSE_MATRIX) {
5317       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5318       for (i = 0; i < am; i++) {
5319         const PetscInt ncols_d = ai[i + 1] - ai[i];
5320         const PetscInt ncols_o = bi[i + 1] - bi[i];
5321         /* diagonal portion of A */
5322         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5323         /* off-diagonal portion of A */
5324         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5325       }
5326       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5327     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5328     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5329     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5330     if (glob) {
5331       PetscInt cst, *gidx;
5332 
5333       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5334       PetscCall(PetscMalloc1(dn + on, &gidx));
5335       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5336       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5337       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5338     }
5339   }
5340   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5341   PetscFunctionReturn(PETSC_SUCCESS);
5342 }
5343 
5344 /*@C
5345   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5346 
5347   Not Collective
5348 
5349   Input Parameters:
5350 + A     - the matrix
5351 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5352 . row   - index set of rows to extract (or `NULL`)
5353 - col   - index set of columns to extract (or `NULL`)
5354 
5355   Output Parameter:
5356 . A_loc - the local sequential matrix generated
5357 
5358   Level: developer
5359 
5360 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5361 @*/
MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS * row,IS * col,Mat * A_loc)5362 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5363 {
5364   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5365   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5366   IS          isrowa, iscola;
5367   Mat        *aloc;
5368   PetscBool   match;
5369 
5370   PetscFunctionBegin;
5371   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5372   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5373   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5374   if (!row) {
5375     start = A->rmap->rstart;
5376     end   = A->rmap->rend;
5377     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5378   } else {
5379     isrowa = *row;
5380   }
5381   if (!col) {
5382     start = A->cmap->rstart;
5383     cmap  = a->garray;
5384     nzA   = a->A->cmap->n;
5385     nzB   = a->B->cmap->n;
5386     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5387     ncols = 0;
5388     for (i = 0; i < nzB; i++) {
5389       if (cmap[i] < start) idx[ncols++] = cmap[i];
5390       else break;
5391     }
5392     imark = i;
5393     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5394     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5395     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5396   } else {
5397     iscola = *col;
5398   }
5399   if (scall != MAT_INITIAL_MATRIX) {
5400     PetscCall(PetscMalloc1(1, &aloc));
5401     aloc[0] = *A_loc;
5402   }
5403   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5404   if (!col) { /* attach global id of condensed columns */
5405     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5406   }
5407   *A_loc = aloc[0];
5408   PetscCall(PetscFree(aloc));
5409   if (!row) PetscCall(ISDestroy(&isrowa));
5410   if (!col) PetscCall(ISDestroy(&iscola));
5411   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5412   PetscFunctionReturn(PETSC_SUCCESS);
5413 }
5414 
5415 /*
5416  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5417  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5418  * on a global size.
5419  * */
MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat * P_oth)5420 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5421 {
5422   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5423   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5424   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5425   PetscMPIInt            owner;
5426   PetscSFNode           *iremote, *oiremote;
5427   const PetscInt        *lrowindices;
5428   PetscSF                sf, osf;
5429   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5430   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5431   MPI_Comm               comm;
5432   ISLocalToGlobalMapping mapping;
5433   const PetscScalar     *pd_a, *po_a;
5434 
5435   PetscFunctionBegin;
5436   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5437   /* plocalsize is the number of roots
5438    * nrows is the number of leaves
5439    * */
5440   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5441   PetscCall(ISGetLocalSize(rows, &nrows));
5442   PetscCall(PetscCalloc1(nrows, &iremote));
5443   PetscCall(ISGetIndices(rows, &lrowindices));
5444   for (i = 0; i < nrows; i++) {
5445     /* Find a remote index and an owner for a row
5446      * The row could be local or remote
5447      * */
5448     owner = 0;
5449     lidx  = 0;
5450     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5451     iremote[i].index = lidx;
5452     iremote[i].rank  = owner;
5453   }
5454   /* Create SF to communicate how many nonzero columns for each row */
5455   PetscCall(PetscSFCreate(comm, &sf));
5456   /* SF will figure out the number of nonzero columns for each row, and their
5457    * offsets
5458    * */
5459   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5460   PetscCall(PetscSFSetFromOptions(sf));
5461   PetscCall(PetscSFSetUp(sf));
5462 
5463   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5464   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5465   PetscCall(PetscCalloc1(nrows, &pnnz));
5466   roffsets[0] = 0;
5467   roffsets[1] = 0;
5468   for (i = 0; i < plocalsize; i++) {
5469     /* diagonal */
5470     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5471     /* off-diagonal */
5472     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5473     /* compute offsets so that we relative location for each row */
5474     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5475     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5476   }
5477   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5478   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5479   /* 'r' means root, and 'l' means leaf */
5480   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5481   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5482   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5483   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5484   PetscCall(PetscSFDestroy(&sf));
5485   PetscCall(PetscFree(roffsets));
5486   PetscCall(PetscFree(nrcols));
5487   dntotalcols = 0;
5488   ontotalcols = 0;
5489   ncol        = 0;
5490   for (i = 0; i < nrows; i++) {
5491     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5492     ncol    = PetscMax(pnnz[i], ncol);
5493     /* diagonal */
5494     dntotalcols += nlcols[i * 2 + 0];
5495     /* off-diagonal */
5496     ontotalcols += nlcols[i * 2 + 1];
5497   }
5498   /* We do not need to figure the right number of columns
5499    * since all the calculations will be done by going through the raw data
5500    * */
5501   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5502   PetscCall(MatSetUp(*P_oth));
5503   PetscCall(PetscFree(pnnz));
5504   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5505   /* diagonal */
5506   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5507   /* off-diagonal */
5508   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5509   /* diagonal */
5510   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5511   /* off-diagonal */
5512   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5513   dntotalcols = 0;
5514   ontotalcols = 0;
5515   ntotalcols  = 0;
5516   for (i = 0; i < nrows; i++) {
5517     owner = 0;
5518     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5519     /* Set iremote for diag matrix */
5520     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5521       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5522       iremote[dntotalcols].rank  = owner;
5523       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5524       ilocal[dntotalcols++] = ntotalcols++;
5525     }
5526     /* off-diagonal */
5527     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5528       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5529       oiremote[ontotalcols].rank  = owner;
5530       oilocal[ontotalcols++]      = ntotalcols++;
5531     }
5532   }
5533   PetscCall(ISRestoreIndices(rows, &lrowindices));
5534   PetscCall(PetscFree(loffsets));
5535   PetscCall(PetscFree(nlcols));
5536   PetscCall(PetscSFCreate(comm, &sf));
5537   /* P serves as roots and P_oth is leaves
5538    * Diag matrix
5539    * */
5540   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5541   PetscCall(PetscSFSetFromOptions(sf));
5542   PetscCall(PetscSFSetUp(sf));
5543 
5544   PetscCall(PetscSFCreate(comm, &osf));
5545   /* off-diagonal */
5546   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5547   PetscCall(PetscSFSetFromOptions(osf));
5548   PetscCall(PetscSFSetUp(osf));
5549   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5550   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5551   /* operate on the matrix internal data to save memory */
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5553   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5554   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5555   /* Convert to global indices for diag matrix */
5556   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5557   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5558   /* We want P_oth store global indices */
5559   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5560   /* Use memory scalable approach */
5561   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5562   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5563   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5564   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5565   /* Convert back to local indices */
5566   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5567   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5568   nout = 0;
5569   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5570   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5571   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5572   /* Exchange values */
5573   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5574   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5575   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5576   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5577   /* Stop PETSc from shrinking memory */
5578   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5579   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5580   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5581   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5582   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5583   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5584   PetscCall(PetscSFDestroy(&sf));
5585   PetscCall(PetscSFDestroy(&osf));
5586   PetscFunctionReturn(PETSC_SUCCESS);
5587 }
5588 
5589 /*
5590  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5591  * This supports MPIAIJ and MAIJ
5592  * */
MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat * P_oth)5593 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5594 {
5595   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5596   Mat_SeqAIJ *p_oth;
5597   IS          rows, map;
5598   PetscHMapI  hamp;
5599   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5600   MPI_Comm    comm;
5601   PetscSF     sf, osf;
5602   PetscBool   has;
5603 
5604   PetscFunctionBegin;
5605   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5606   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5607   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5608    *  and then create a submatrix (that often is an overlapping matrix)
5609    * */
5610   if (reuse == MAT_INITIAL_MATRIX) {
5611     /* Use a hash table to figure out unique keys */
5612     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5613     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5614     count = 0;
5615     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5616     for (i = 0; i < a->B->cmap->n; i++) {
5617       key = a->garray[i] / dof;
5618       PetscCall(PetscHMapIHas(hamp, key, &has));
5619       if (!has) {
5620         mapping[i] = count;
5621         PetscCall(PetscHMapISet(hamp, key, count++));
5622       } else {
5623         /* Current 'i' has the same value the previous step */
5624         mapping[i] = count - 1;
5625       }
5626     }
5627     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5628     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5629     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5630     PetscCall(PetscCalloc1(htsize, &rowindices));
5631     off = 0;
5632     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5633     PetscCall(PetscHMapIDestroy(&hamp));
5634     PetscCall(PetscSortInt(htsize, rowindices));
5635     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5636     /* In case, the matrix was already created but users want to recreate the matrix */
5637     PetscCall(MatDestroy(P_oth));
5638     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5639     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5640     PetscCall(ISDestroy(&map));
5641     PetscCall(ISDestroy(&rows));
5642   } else if (reuse == MAT_REUSE_MATRIX) {
5643     /* If matrix was already created, we simply update values using SF objects
5644      * that as attached to the matrix earlier.
5645      */
5646     const PetscScalar *pd_a, *po_a;
5647 
5648     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5649     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5650     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5651     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5652     /* Update values in place */
5653     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5654     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5655     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5656     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5657     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5658     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5659     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5660     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5661   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5662   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5663   PetscFunctionReturn(PETSC_SUCCESS);
5664 }
5665 
5666 /*@C
5667   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5668 
5669   Collective
5670 
5671   Input Parameters:
5672 + A     - the first matrix in `MATMPIAIJ` format
5673 . B     - the second matrix in `MATMPIAIJ` format
5674 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5675 
5676   Output Parameters:
5677 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5678 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5679 - B_seq - the sequential matrix generated
5680 
5681   Level: developer
5682 
5683 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5684 @*/
MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS * rowb,IS * colb,Mat * B_seq)5685 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5686 {
5687   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5688   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5689   IS          isrowb, iscolb;
5690   Mat        *bseq = NULL;
5691 
5692   PetscFunctionBegin;
5693   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5694              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5695   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5696 
5697   if (scall == MAT_INITIAL_MATRIX) {
5698     start = A->cmap->rstart;
5699     cmap  = a->garray;
5700     nzA   = a->A->cmap->n;
5701     nzB   = a->B->cmap->n;
5702     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5703     ncols = 0;
5704     for (i = 0; i < nzB; i++) { /* row < local row index */
5705       if (cmap[i] < start) idx[ncols++] = cmap[i];
5706       else break;
5707     }
5708     imark = i;
5709     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5710     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5711     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5712     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5713   } else {
5714     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5715     isrowb = *rowb;
5716     iscolb = *colb;
5717     PetscCall(PetscMalloc1(1, &bseq));
5718     bseq[0] = *B_seq;
5719   }
5720   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5721   *B_seq = bseq[0];
5722   PetscCall(PetscFree(bseq));
5723   if (!rowb) {
5724     PetscCall(ISDestroy(&isrowb));
5725   } else {
5726     *rowb = isrowb;
5727   }
5728   if (!colb) {
5729     PetscCall(ISDestroy(&iscolb));
5730   } else {
5731     *colb = iscolb;
5732   }
5733   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5734   PetscFunctionReturn(PETSC_SUCCESS);
5735 }
5736 
5737 /*
5738     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5739     of the OFF-DIAGONAL portion of local A
5740 
5741     Collective
5742 
5743    Input Parameters:
5744 +    A,B - the matrices in `MATMPIAIJ` format
5745 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5746 
5747    Output Parameter:
5748 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5749 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5750 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5751 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5752 
5753     Developer Note:
5754     This directly accesses information inside the VecScatter associated with the matrix-vector product
5755      for this matrix. This is not desirable..
5756 
5757     Level: developer
5758 
5759 */
5760 
MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt ** startsj_s,PetscInt ** startsj_r,MatScalar ** bufa_ptr,Mat * B_oth)5761 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5762 {
5763   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5764   VecScatter         ctx;
5765   MPI_Comm           comm;
5766   const PetscMPIInt *rprocs, *sprocs;
5767   PetscMPIInt        nrecvs, nsends;
5768   const PetscInt    *srow, *rstarts, *sstarts;
5769   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5770   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5771   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5772   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5773   PetscMPIInt        size, tag, rank, nreqs;
5774 
5775   PetscFunctionBegin;
5776   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5777   PetscCallMPI(MPI_Comm_size(comm, &size));
5778 
5779   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5780              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5781   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5782   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5783 
5784   if (size == 1) {
5785     startsj_s = NULL;
5786     bufa_ptr  = NULL;
5787     *B_oth    = NULL;
5788     PetscFunctionReturn(PETSC_SUCCESS);
5789   }
5790 
5791   ctx = a->Mvctx;
5792   tag = ((PetscObject)ctx)->tag;
5793 
5794   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5795   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5796   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5797   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5798   PetscCall(PetscMalloc1(nreqs, &reqs));
5799   rwaits = reqs;
5800   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5801 
5802   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5803   if (scall == MAT_INITIAL_MATRIX) {
5804     /* i-array */
5805     /*  post receives */
5806     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5807     for (i = 0; i < nrecvs; i++) {
5808       rowlen = rvalues + rstarts[i] * rbs;
5809       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5810       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5811     }
5812 
5813     /* pack the outgoing message */
5814     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5815 
5816     sstartsj[0] = 0;
5817     rstartsj[0] = 0;
5818     len         = 0; /* total length of j or a array to be sent */
5819     if (nsends) {
5820       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5821       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5822     }
5823     for (i = 0; i < nsends; i++) {
5824       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5825       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5826       for (j = 0; j < nrows; j++) {
5827         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5828         for (l = 0; l < sbs; l++) {
5829           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5830 
5831           rowlen[j * sbs + l] = ncols;
5832 
5833           len += ncols;
5834           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5835         }
5836         k++;
5837       }
5838       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5839 
5840       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5841     }
5842     /* recvs and sends of i-array are completed */
5843     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5844     PetscCall(PetscFree(svalues));
5845 
5846     /* allocate buffers for sending j and a arrays */
5847     PetscCall(PetscMalloc1(len, &bufj));
5848     PetscCall(PetscMalloc1(len, &bufa));
5849 
5850     /* create i-array of B_oth */
5851     PetscCall(PetscMalloc1(aBn + 1, &b_othi));
5852 
5853     b_othi[0] = 0;
5854     len       = 0; /* total length of j or a array to be received */
5855     k         = 0;
5856     for (i = 0; i < nrecvs; i++) {
5857       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5858       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5859       for (j = 0; j < nrows; j++) {
5860         b_othi[k + 1] = b_othi[k] + rowlen[j];
5861         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5862         k++;
5863       }
5864       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5865     }
5866     PetscCall(PetscFree(rvalues));
5867 
5868     /* allocate space for j and a arrays of B_oth */
5869     PetscCall(PetscMalloc1(b_othi[aBn], &b_othj));
5870     PetscCall(PetscMalloc1(b_othi[aBn], &b_otha));
5871 
5872     /* j-array */
5873     /*  post receives of j-array */
5874     for (i = 0; i < nrecvs; i++) {
5875       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5876       PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_othj, rstartsj[i]), nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5877     }
5878 
5879     /* pack the outgoing message j-array */
5880     if (nsends) k = sstarts[0];
5881     for (i = 0; i < nsends; i++) {
5882       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5883       bufJ  = PetscSafePointerPlusOffset(bufj, sstartsj[i]);
5884       for (j = 0; j < nrows; j++) {
5885         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5886         for (ll = 0; ll < sbs; ll++) {
5887           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5888           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5889           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5890         }
5891       }
5892       PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufj, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5893     }
5894 
5895     /* recvs and sends of j-array are completed */
5896     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5897   } else if (scall == MAT_REUSE_MATRIX) {
5898     sstartsj = *startsj_s;
5899     rstartsj = *startsj_r;
5900     bufa     = *bufa_ptr;
5901     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5902   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5903 
5904   /* a-array */
5905   /*  post receives of a-array */
5906   for (i = 0; i < nrecvs; i++) {
5907     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5908     PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_otha, rstartsj[i]), nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5909   }
5910 
5911   /* pack the outgoing message a-array */
5912   if (nsends) k = sstarts[0];
5913   for (i = 0; i < nsends; i++) {
5914     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5915     bufA  = PetscSafePointerPlusOffset(bufa, sstartsj[i]);
5916     for (j = 0; j < nrows; j++) {
5917       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5918       for (ll = 0; ll < sbs; ll++) {
5919         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5920         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5921         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5922       }
5923     }
5924     PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufa, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5925   }
5926   /* recvs and sends of a-array are completed */
5927   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5928   PetscCall(PetscFree(reqs));
5929 
5930   if (scall == MAT_INITIAL_MATRIX) {
5931     Mat_SeqAIJ *b_oth;
5932 
5933     /* put together the new matrix */
5934     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5935 
5936     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5937     /* Since these are PETSc arrays, change flags to free them as necessary. */
5938     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5939     b_oth->free_a  = PETSC_TRUE;
5940     b_oth->free_ij = PETSC_TRUE;
5941     b_oth->nonew   = 0;
5942 
5943     PetscCall(PetscFree(bufj));
5944     if (!startsj_s || !bufa_ptr) {
5945       PetscCall(PetscFree2(sstartsj, rstartsj));
5946       PetscCall(PetscFree(bufa_ptr));
5947     } else {
5948       *startsj_s = sstartsj;
5949       *startsj_r = rstartsj;
5950       *bufa_ptr  = bufa;
5951     }
5952   } else if (scall == MAT_REUSE_MATRIX) {
5953     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5954   }
5955 
5956   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5957   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5958   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5959   PetscFunctionReturn(PETSC_SUCCESS);
5960 }
5961 
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5963 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5965 #if defined(PETSC_HAVE_MKL_SPARSE)
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5967 #endif
5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5970 #if defined(PETSC_HAVE_ELEMENTAL)
5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5972 #endif
5973 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
5974 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5975 #endif
5976 #if defined(PETSC_HAVE_HYPRE)
5977 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5978 #endif
5979 #if defined(PETSC_HAVE_CUDA)
5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5981 #endif
5982 #if defined(PETSC_HAVE_HIP)
5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
5984 #endif
5985 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5987 #endif
5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5989 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5990 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5991 
5992 /*
5993     Computes (B'*A')' since computing B*A directly is untenable
5994 
5995                n                       p                          p
5996         [             ]       [             ]         [                 ]
5997       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5998         [             ]       [             ]         [                 ]
5999 
6000 */
MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)6001 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6002 {
6003   Mat At, Bt, Ct;
6004 
6005   PetscFunctionBegin;
6006   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6007   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6008   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6009   PetscCall(MatDestroy(&At));
6010   PetscCall(MatDestroy(&Bt));
6011   PetscCall(MatTransposeSetPrecursor(Ct, C));
6012   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6013   PetscCall(MatDestroy(&Ct));
6014   PetscFunctionReturn(PETSC_SUCCESS);
6015 }
6016 
MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)6017 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6018 {
6019   PetscBool cisdense;
6020 
6021   PetscFunctionBegin;
6022   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6023   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6024   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6025   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6026   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6027   PetscCall(MatSetUp(C));
6028 
6029   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6030   PetscFunctionReturn(PETSC_SUCCESS);
6031 }
6032 
MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)6033 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6034 {
6035   Mat_Product *product = C->product;
6036   Mat          A = product->A, B = product->B;
6037 
6038   PetscFunctionBegin;
6039   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6040              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6041   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6042   C->ops->productsymbolic = MatProductSymbolic_AB;
6043   PetscFunctionReturn(PETSC_SUCCESS);
6044 }
6045 
MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)6046 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6047 {
6048   Mat_Product *product = C->product;
6049 
6050   PetscFunctionBegin;
6051   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6052   PetscFunctionReturn(PETSC_SUCCESS);
6053 }
6054 
6055 /*
6056    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6057 
6058   Input Parameters:
6059 
6060     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6061     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6062 
6063     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6064 
6065     For Set1, j1[] contains column indices of the nonzeros.
6066     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6067     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6068     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6069 
6070     Similar for Set2.
6071 
6072     This routine merges the two sets of nonzeros row by row and removes repeats.
6073 
6074   Output Parameters: (memory is allocated by the caller)
6075 
6076     i[],j[]: the CSR of the merged matrix, which has m rows.
6077     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6078     imap2[]: similar to imap1[], but for Set2.
6079     Note we order nonzeros row-by-row and from left to right.
6080 */
MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])6081 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6082 {
6083   PetscInt   r, m; /* Row index of mat */
6084   PetscCount t, t1, t2, b1, e1, b2, e2;
6085 
6086   PetscFunctionBegin;
6087   PetscCall(MatGetLocalSize(mat, &m, NULL));
6088   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6089   i[0]        = 0;
6090   for (r = 0; r < m; r++) { /* Do row by row merging */
6091     b1 = rowBegin1[r];
6092     e1 = rowEnd1[r];
6093     b2 = rowBegin2[r];
6094     e2 = rowEnd2[r];
6095     while (b1 < e1 && b2 < e2) {
6096       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6097         j[t]      = j1[b1];
6098         imap1[t1] = t;
6099         imap2[t2] = t;
6100         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6101         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6102         t1++;
6103         t2++;
6104         t++;
6105       } else if (j1[b1] < j2[b2]) {
6106         j[t]      = j1[b1];
6107         imap1[t1] = t;
6108         b1 += jmap1[t1 + 1] - jmap1[t1];
6109         t1++;
6110         t++;
6111       } else {
6112         j[t]      = j2[b2];
6113         imap2[t2] = t;
6114         b2 += jmap2[t2 + 1] - jmap2[t2];
6115         t2++;
6116         t++;
6117       }
6118     }
6119     /* Merge the remaining in either j1[] or j2[] */
6120     while (b1 < e1) {
6121       j[t]      = j1[b1];
6122       imap1[t1] = t;
6123       b1 += jmap1[t1 + 1] - jmap1[t1];
6124       t1++;
6125       t++;
6126     }
6127     while (b2 < e2) {
6128       j[t]      = j2[b2];
6129       imap2[t2] = t;
6130       b2 += jmap2[t2 + 1] - jmap2[t2];
6131       t2++;
6132       t++;
6133     }
6134     PetscCall(PetscIntCast(t, i + r + 1));
6135   }
6136   PetscFunctionReturn(PETSC_SUCCESS);
6137 }
6138 
6139 /*
6140   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6141 
6142   Input Parameters:
6143     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6144     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6145       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6146 
6147       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6148       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6149 
6150   Output Parameters:
6151     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6152     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6153       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6154       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6155 
6156     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6157       Atot: number of entries belonging to the diagonal block.
6158       Annz: number of unique nonzeros belonging to the diagonal block.
6159       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6160         repeats (i.e., same 'i,j' pair).
6161       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6162         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6163 
6164       Atot: number of entries belonging to the diagonal block
6165       Annz: number of unique nonzeros belonging to the diagonal block.
6166 
6167     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6168 
6169     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6170 */
MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],PetscCount * Atot_,PetscCount ** Aperm_,PetscCount * Annz_,PetscCount ** Ajmap_,PetscCount * Btot_,PetscCount ** Bperm_,PetscCount * Bnnz_,PetscCount ** Bjmap_)6171 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6172 {
6173   PetscInt    cstart, cend, rstart, rend, row, col;
6174   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6175   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6176   PetscCount  k, m, p, q, r, s, mid;
6177   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6178 
6179   PetscFunctionBegin;
6180   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6181   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6182   m = rend - rstart;
6183 
6184   /* Skip negative rows */
6185   for (k = 0; k < n; k++)
6186     if (i[k] >= 0) break;
6187 
6188   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6189      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6190   */
6191   while (k < n) {
6192     row = i[k];
6193     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6194     for (s = k; s < n; s++)
6195       if (i[s] != row) break;
6196 
6197     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6198     for (p = k; p < s; p++) {
6199       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6200     }
6201     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6202     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6203     rowBegin[row - rstart] = k;
6204     rowMid[row - rstart]   = mid;
6205     rowEnd[row - rstart]   = s;
6206     PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N);
6207 
6208     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6209     Atot += mid - k;
6210     Btot += s - mid;
6211 
6212     /* Count unique nonzeros of this diag row */
6213     for (p = k; p < mid;) {
6214       col = j[p];
6215       do {
6216         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6217         p++;
6218       } while (p < mid && j[p] == col);
6219       Annz++;
6220     }
6221 
6222     /* Count unique nonzeros of this offdiag row */
6223     for (p = mid; p < s;) {
6224       col = j[p];
6225       do {
6226         p++;
6227       } while (p < s && j[p] == col);
6228       Bnnz++;
6229     }
6230     k = s;
6231   }
6232 
6233   /* Allocation according to Atot, Btot, Annz, Bnnz */
6234   PetscCall(PetscMalloc1(Atot, &Aperm));
6235   PetscCall(PetscMalloc1(Btot, &Bperm));
6236   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6237   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6238 
6239   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6240   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6241   for (r = 0; r < m; r++) {
6242     k   = rowBegin[r];
6243     mid = rowMid[r];
6244     s   = rowEnd[r];
6245     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6246     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6247     Atot += mid - k;
6248     Btot += s - mid;
6249 
6250     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6251     for (p = k; p < mid;) {
6252       col = j[p];
6253       q   = p;
6254       do {
6255         p++;
6256       } while (p < mid && j[p] == col);
6257       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6258       Annz++;
6259     }
6260 
6261     for (p = mid; p < s;) {
6262       col = j[p];
6263       q   = p;
6264       do {
6265         p++;
6266       } while (p < s && j[p] == col);
6267       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6268       Bnnz++;
6269     }
6270   }
6271   /* Output */
6272   *Aperm_ = Aperm;
6273   *Annz_  = Annz;
6274   *Atot_  = Atot;
6275   *Ajmap_ = Ajmap;
6276   *Bperm_ = Bperm;
6277   *Bnnz_  = Bnnz;
6278   *Btot_  = Btot;
6279   *Bjmap_ = Bjmap;
6280   PetscFunctionReturn(PETSC_SUCCESS);
6281 }
6282 
6283 /*
6284   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6285 
6286   Input Parameters:
6287     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6288     nnz:  number of unique nonzeros in the merged matrix
6289     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6290     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6291 
6292   Output Parameter: (memory is allocated by the caller)
6293     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6294 
6295   Example:
6296     nnz1 = 4
6297     nnz  = 6
6298     imap = [1,3,4,5]
6299     jmap = [0,3,5,6,7]
6300    then,
6301     jmap_new = [0,0,3,3,5,6,7]
6302 */
ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])6303 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6304 {
6305   PetscCount k, p;
6306 
6307   PetscFunctionBegin;
6308   jmap_new[0] = 0;
6309   p           = nnz;                /* p loops over jmap_new[] backwards */
6310   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6311     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6312   }
6313   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6314   PetscFunctionReturn(PETSC_SUCCESS);
6315 }
6316 
MatCOOStructDestroy_MPIAIJ(PetscCtxRt data)6317 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(PetscCtxRt data)
6318 {
6319   MatCOOStruct_MPIAIJ *coo = *(MatCOOStruct_MPIAIJ **)data;
6320 
6321   PetscFunctionBegin;
6322   PetscCall(PetscSFDestroy(&coo->sf));
6323   PetscCall(PetscFree(coo->Aperm1));
6324   PetscCall(PetscFree(coo->Bperm1));
6325   PetscCall(PetscFree(coo->Ajmap1));
6326   PetscCall(PetscFree(coo->Bjmap1));
6327   PetscCall(PetscFree(coo->Aimap2));
6328   PetscCall(PetscFree(coo->Bimap2));
6329   PetscCall(PetscFree(coo->Aperm2));
6330   PetscCall(PetscFree(coo->Bperm2));
6331   PetscCall(PetscFree(coo->Ajmap2));
6332   PetscCall(PetscFree(coo->Bjmap2));
6333   PetscCall(PetscFree(coo->Cperm1));
6334   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6335   PetscCall(PetscFree(coo));
6336   PetscFunctionReturn(PETSC_SUCCESS);
6337 }
6338 
MatSetPreallocationCOO_MPIAIJ(Mat mat,PetscCount coo_n,PetscInt coo_i[],PetscInt coo_j[])6339 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6340 {
6341   MPI_Comm             comm;
6342   PetscMPIInt          rank, size;
6343   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6344   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6345   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6346   PetscContainer       container;
6347   MatCOOStruct_MPIAIJ *coo;
6348 
6349   PetscFunctionBegin;
6350   PetscCall(PetscFree(mpiaij->garray));
6351   PetscCall(VecDestroy(&mpiaij->lvec));
6352 #if defined(PETSC_USE_CTABLE)
6353   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6354 #else
6355   PetscCall(PetscFree(mpiaij->colmap));
6356 #endif
6357   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6358   mat->assembled     = PETSC_FALSE;
6359   mat->was_assembled = PETSC_FALSE;
6360 
6361   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6362   PetscCallMPI(MPI_Comm_size(comm, &size));
6363   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6364   PetscCall(PetscLayoutSetUp(mat->rmap));
6365   PetscCall(PetscLayoutSetUp(mat->cmap));
6366   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6367   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6368   PetscCall(MatGetLocalSize(mat, &m, &n));
6369   PetscCall(MatGetSize(mat, &M, &N));
6370 
6371   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6372   /* entries come first, then local rows, then remote rows.                     */
6373   PetscCount n1 = coo_n, *perm1;
6374   PetscInt  *i1 = coo_i, *j1 = coo_j;
6375 
6376   PetscCall(PetscMalloc1(n1, &perm1));
6377   for (k = 0; k < n1; k++) perm1[k] = k;
6378 
6379   /* Manipulate indices so that entries with negative row or col indices will have smallest
6380      row indices, local entries will have greater but negative row indices, and remote entries
6381      will have positive row indices.
6382   */
6383   for (k = 0; k < n1; k++) {
6384     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6385     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6386     else {
6387       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6388       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6389     }
6390   }
6391 
6392   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6393   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6394 
6395   /* Advance k to the first entry we need to take care of */
6396   for (k = 0; k < n1; k++)
6397     if (i1[k] > PETSC_INT_MIN) break;
6398   PetscCount i1start = k;
6399 
6400   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6401   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6402 
6403   PetscCheck(n1 == 0 || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M);
6404 
6405   /*           Send remote rows to their owner                                  */
6406   /* Find which rows should be sent to which remote ranks*/
6407   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6408   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6409   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6410   const PetscInt *ranges;
6411   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6412 
6413   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6414   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6415   for (k = rem; k < n1;) {
6416     PetscMPIInt owner;
6417     PetscInt    firstRow, lastRow;
6418 
6419     /* Locate a row range */
6420     firstRow = i1[k]; /* first row of this owner */
6421     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6422     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6423 
6424     /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */
6425     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6426 
6427     /* All entries in [k,p) belong to this remote owner */
6428     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6429       PetscMPIInt *sendto2;
6430       PetscInt    *nentries2;
6431       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6432 
6433       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6434       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6435       PetscCall(PetscArraycpy(nentries2, nentries, maxNsend));
6436       PetscCall(PetscFree2(sendto, nentries));
6437       sendto   = sendto2;
6438       nentries = nentries2;
6439       maxNsend = maxNsend2;
6440     }
6441     sendto[nsend] = owner;
6442     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6443     nsend++;
6444     k = p;
6445   }
6446 
6447   /* Build 1st SF to know offsets on remote to send data */
6448   PetscSF      sf1;
6449   PetscInt     nroots = 1, nroots2 = 0;
6450   PetscInt     nleaves = nsend, nleaves2 = 0;
6451   PetscInt    *offsets;
6452   PetscSFNode *iremote;
6453 
6454   PetscCall(PetscSFCreate(comm, &sf1));
6455   PetscCall(PetscMalloc1(nsend, &iremote));
6456   PetscCall(PetscMalloc1(nsend, &offsets));
6457   for (k = 0; k < nsend; k++) {
6458     iremote[k].rank  = sendto[k];
6459     iremote[k].index = 0;
6460     nleaves2 += nentries[k];
6461     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6462   }
6463   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6464   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6465   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6466   PetscCall(PetscSFDestroy(&sf1));
6467   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6468 
6469   /* Build 2nd SF to send remote COOs to their owner */
6470   PetscSF sf2;
6471   nroots  = nroots2;
6472   nleaves = nleaves2;
6473   PetscCall(PetscSFCreate(comm, &sf2));
6474   PetscCall(PetscSFSetFromOptions(sf2));
6475   PetscCall(PetscMalloc1(nleaves, &iremote));
6476   p = 0;
6477   for (k = 0; k < nsend; k++) {
6478     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6479     for (q = 0; q < nentries[k]; q++, p++) {
6480       iremote[p].rank = sendto[k];
6481       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6482     }
6483   }
6484   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6485 
6486   /* Send the remote COOs to their owner */
6487   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6488   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6489   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6490   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6491   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6492   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6493   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6494   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6495   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6496   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6497   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6498 
6499   PetscCall(PetscFree(offsets));
6500   PetscCall(PetscFree2(sendto, nentries));
6501 
6502   /* Sort received COOs by row along with the permutation array     */
6503   for (k = 0; k < n2; k++) perm2[k] = k;
6504   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6505 
6506   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6507   PetscCount *Cperm1;
6508   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6509   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6510   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6511   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6512 
6513   /* Support for HYPRE matrices, kind of a hack.
6514      Swap min column with diagonal so that diagonal values will go first */
6515   PetscBool hypre;
6516   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6517   if (hypre) {
6518     PetscInt *minj;
6519     PetscBT   hasdiag;
6520 
6521     PetscCall(PetscBTCreate(m, &hasdiag));
6522     PetscCall(PetscMalloc1(m, &minj));
6523     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6524     for (k = i1start; k < rem; k++) {
6525       if (j1[k] < cstart || j1[k] >= cend) continue;
6526       const PetscInt rindex = i1[k] - rstart;
6527       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6528       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6529     }
6530     for (k = 0; k < n2; k++) {
6531       if (j2[k] < cstart || j2[k] >= cend) continue;
6532       const PetscInt rindex = i2[k] - rstart;
6533       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6534       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6535     }
6536     for (k = i1start; k < rem; k++) {
6537       const PetscInt rindex = i1[k] - rstart;
6538       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6539       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6540       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6541     }
6542     for (k = 0; k < n2; k++) {
6543       const PetscInt rindex = i2[k] - rstart;
6544       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6545       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6546       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6547     }
6548     PetscCall(PetscBTDestroy(&hasdiag));
6549     PetscCall(PetscFree(minj));
6550   }
6551 
6552   /* Split local COOs and received COOs into diag/offdiag portions */
6553   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6554   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6555   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6556   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6557   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6558   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6559 
6560   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6561   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6562   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6563   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6564 
6565   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6566   PetscInt *Ai, *Bi;
6567   PetscInt *Aj, *Bj;
6568 
6569   PetscCall(PetscMalloc1(m + 1, &Ai));
6570   PetscCall(PetscMalloc1(m + 1, &Bi));
6571   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6572   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6573 
6574   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6575   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6576   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6577   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6578   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6579 
6580   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6581   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6582 
6583   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6584   /* expect nonzeros in A/B most likely have local contributing entries        */
6585   PetscInt    Annz = Ai[m];
6586   PetscInt    Bnnz = Bi[m];
6587   PetscCount *Ajmap1_new, *Bjmap1_new;
6588 
6589   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6590   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6591 
6592   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6593   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6594 
6595   PetscCall(PetscFree(Aimap1));
6596   PetscCall(PetscFree(Ajmap1));
6597   PetscCall(PetscFree(Bimap1));
6598   PetscCall(PetscFree(Bjmap1));
6599   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6600   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6601   PetscCall(PetscFree(perm1));
6602   PetscCall(PetscFree3(i2, j2, perm2));
6603 
6604   Ajmap1 = Ajmap1_new;
6605   Bjmap1 = Bjmap1_new;
6606 
6607   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6608   if (Annz < Annz1 + Annz2) {
6609     PetscInt *Aj_new;
6610     PetscCall(PetscMalloc1(Annz, &Aj_new));
6611     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6612     PetscCall(PetscFree(Aj));
6613     Aj = Aj_new;
6614   }
6615 
6616   if (Bnnz < Bnnz1 + Bnnz2) {
6617     PetscInt *Bj_new;
6618     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6619     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6620     PetscCall(PetscFree(Bj));
6621     Bj = Bj_new;
6622   }
6623 
6624   /* Create new submatrices for on-process and off-process coupling                  */
6625   PetscScalar     *Aa, *Ba;
6626   MatType          rtype;
6627   Mat_SeqAIJ      *a, *b;
6628   PetscObjectState state;
6629   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6630   PetscCall(PetscCalloc1(Bnnz, &Ba));
6631   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6632   if (cstart) {
6633     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6634   }
6635 
6636   PetscCall(MatGetRootType_Private(mat, &rtype));
6637 
6638   MatSeqXAIJGetOptions_Private(mpiaij->A);
6639   PetscCall(MatDestroy(&mpiaij->A));
6640   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6641   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6642   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6643 
6644   MatSeqXAIJGetOptions_Private(mpiaij->B);
6645   PetscCall(MatDestroy(&mpiaij->B));
6646   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6647   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6648   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6649 
6650   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6651   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6652   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6653   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6654 
6655   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6656   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6657   a->free_a  = PETSC_TRUE;
6658   a->free_ij = PETSC_TRUE;
6659   b->free_a  = PETSC_TRUE;
6660   b->free_ij = PETSC_TRUE;
6661   a->maxnz   = a->nz;
6662   b->maxnz   = b->nz;
6663 
6664   /* conversion must happen AFTER multiply setup */
6665   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6666   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6667   PetscCall(VecDestroy(&mpiaij->lvec));
6668   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6669 
6670   // Put the COO struct in a container and then attach that to the matrix
6671   PetscCall(PetscMalloc1(1, &coo));
6672   coo->n       = coo_n;
6673   coo->sf      = sf2;
6674   coo->sendlen = nleaves;
6675   coo->recvlen = nroots;
6676   coo->Annz    = Annz;
6677   coo->Bnnz    = Bnnz;
6678   coo->Annz2   = Annz2;
6679   coo->Bnnz2   = Bnnz2;
6680   coo->Atot1   = Atot1;
6681   coo->Atot2   = Atot2;
6682   coo->Btot1   = Btot1;
6683   coo->Btot2   = Btot2;
6684   coo->Ajmap1  = Ajmap1;
6685   coo->Aperm1  = Aperm1;
6686   coo->Bjmap1  = Bjmap1;
6687   coo->Bperm1  = Bperm1;
6688   coo->Aimap2  = Aimap2;
6689   coo->Ajmap2  = Ajmap2;
6690   coo->Aperm2  = Aperm2;
6691   coo->Bimap2  = Bimap2;
6692   coo->Bjmap2  = Bjmap2;
6693   coo->Bperm2  = Bperm2;
6694   coo->Cperm1  = Cperm1;
6695   // Allocate in preallocation. If not used, it has zero cost on host
6696   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6697   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6698   PetscCall(PetscContainerSetPointer(container, coo));
6699   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6700   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6701   PetscCall(PetscContainerDestroy(&container));
6702   PetscFunctionReturn(PETSC_SUCCESS);
6703 }
6704 
MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)6705 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6706 {
6707   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6708   Mat                  A = mpiaij->A, B = mpiaij->B;
6709   PetscScalar         *Aa, *Ba;
6710   PetscScalar         *sendbuf, *recvbuf;
6711   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6712   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6713   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6714   const PetscCount    *Cperm1;
6715   PetscContainer       container;
6716   MatCOOStruct_MPIAIJ *coo;
6717 
6718   PetscFunctionBegin;
6719   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6720   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6721   PetscCall(PetscContainerGetPointer(container, &coo));
6722   sendbuf = coo->sendbuf;
6723   recvbuf = coo->recvbuf;
6724   Ajmap1  = coo->Ajmap1;
6725   Ajmap2  = coo->Ajmap2;
6726   Aimap2  = coo->Aimap2;
6727   Bjmap1  = coo->Bjmap1;
6728   Bjmap2  = coo->Bjmap2;
6729   Bimap2  = coo->Bimap2;
6730   Aperm1  = coo->Aperm1;
6731   Aperm2  = coo->Aperm2;
6732   Bperm1  = coo->Bperm1;
6733   Bperm2  = coo->Bperm2;
6734   Cperm1  = coo->Cperm1;
6735 
6736   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6737   PetscCall(MatSeqAIJGetArray(B, &Ba));
6738 
6739   /* Pack entries to be sent to remote */
6740   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6741 
6742   /* Send remote entries to their owner and overlap the communication with local computation */
6743   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6744   /* Add local entries to A and B */
6745   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6746     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6747     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6748     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6749   }
6750   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6751     PetscScalar sum = 0.0;
6752     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6753     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6754   }
6755   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6756 
6757   /* Add received remote entries to A and B */
6758   for (PetscCount i = 0; i < coo->Annz2; i++) {
6759     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6760   }
6761   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6762     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6763   }
6764   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6765   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6766   PetscFunctionReturn(PETSC_SUCCESS);
6767 }
6768 
6769 /*MC
6770    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6771 
6772    Options Database Keys:
6773 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6774 
6775    Level: beginner
6776 
6777    Notes:
6778    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6779     in this case the values associated with the rows and columns one passes in are set to zero
6780     in the matrix
6781 
6782     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6783     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6784 
6785 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6786 M*/
MatCreate_MPIAIJ(Mat B)6787 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6788 {
6789   Mat_MPIAIJ *b;
6790   PetscMPIInt size;
6791 
6792   PetscFunctionBegin;
6793   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6794 
6795   PetscCall(PetscNew(&b));
6796   B->data       = (void *)b;
6797   B->ops[0]     = MatOps_Values;
6798   B->assembled  = PETSC_FALSE;
6799   B->insertmode = NOT_SET_VALUES;
6800   b->size       = size;
6801 
6802   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6803 
6804   /* build cache for off array entries formed */
6805   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6806 
6807   b->donotstash  = PETSC_FALSE;
6808   b->colmap      = NULL;
6809   b->garray      = NULL;
6810   b->roworiented = PETSC_TRUE;
6811 
6812   /* stuff used for matrix vector multiply */
6813   b->lvec  = NULL;
6814   b->Mvctx = NULL;
6815 
6816   /* stuff for MatGetRow() */
6817   b->rowindices   = NULL;
6818   b->rowvalues    = NULL;
6819   b->getrowactive = PETSC_FALSE;
6820 
6821   /* flexible pointer used in CUSPARSE classes */
6822   b->spptr = NULL;
6823 
6824   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6825   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6826   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6827   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6828   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6829   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6835 #if defined(PETSC_HAVE_CUDA)
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6837 #endif
6838 #if defined(PETSC_HAVE_HIP)
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6840 #endif
6841 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6843 #endif
6844 #if defined(PETSC_HAVE_MKL_SPARSE)
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6846 #endif
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6851 #if defined(PETSC_HAVE_ELEMENTAL)
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6853 #endif
6854 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6856 #endif
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6859 #if defined(PETSC_HAVE_HYPRE)
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6862 #endif
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6867   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6868   PetscFunctionReturn(PETSC_SUCCESS);
6869 }
6870 
6871 /*@
6872   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6873   and "off-diagonal" part of the matrix in CSR format.
6874 
6875   Collective
6876 
6877   Input Parameters:
6878 + comm - MPI communicator
6879 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6880 . n    - This value should be the same as the local size used in creating the
6881          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6882          calculated if `N` is given) For square matrices `n` is almost always `m`.
6883 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6884 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6885 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6886 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6887 . a    - matrix values
6888 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6889 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6890 - oa   - matrix values
6891 
6892   Output Parameter:
6893 . mat - the matrix
6894 
6895   Level: advanced
6896 
6897   Notes:
6898   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6899   must free the arrays once the matrix has been destroyed and not before.
6900 
6901   The `i` and `j` indices are 0 based
6902 
6903   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6904 
6905   This sets local rows and cannot be used to set off-processor values.
6906 
6907   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6908   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6909   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6910   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6911   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6912   communication if it is known that only local entries will be set.
6913 
6914 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6915           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6916 @*/
MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[],PetscInt oj[],PetscScalar oa[],Mat * mat)6917 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6918 {
6919   Mat_MPIAIJ *maij;
6920 
6921   PetscFunctionBegin;
6922   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6923   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6924   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6925   PetscCall(MatCreate(comm, mat));
6926   PetscCall(MatSetSizes(*mat, m, n, M, N));
6927   PetscCall(MatSetType(*mat, MATMPIAIJ));
6928   maij = (Mat_MPIAIJ *)(*mat)->data;
6929 
6930   (*mat)->preallocated = PETSC_TRUE;
6931 
6932   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6933   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6934 
6935   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6936   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6937 
6938   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6939   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6940   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6941   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6942   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6943   PetscFunctionReturn(PETSC_SUCCESS);
6944 }
6945 
6946 typedef struct {
6947   Mat       *mp;    /* intermediate products */
6948   PetscBool *mptmp; /* is the intermediate product temporary ? */
6949   PetscInt   cp;    /* number of intermediate products */
6950 
6951   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6952   PetscInt    *startsj_s, *startsj_r;
6953   PetscScalar *bufa;
6954   Mat          P_oth;
6955 
6956   /* may take advantage of merging product->B */
6957   Mat Bloc; /* B-local by merging diag and off-diag */
6958 
6959   /* cusparse does not have support to split between symbolic and numeric phases.
6960      When api_user is true, we don't need to update the numerical values
6961      of the temporary storage */
6962   PetscBool reusesym;
6963 
6964   /* support for COO values insertion */
6965   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6966   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6967   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6968   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6969   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6970   PetscMemType mtype;
6971 
6972   /* customization */
6973   PetscBool abmerge;
6974   PetscBool P_oth_bind;
6975 } MatMatMPIAIJBACKEND;
6976 
MatProductCtxDestroy_MatMatMPIAIJBACKEND(PetscCtxRt data)6977 static PetscErrorCode MatProductCtxDestroy_MatMatMPIAIJBACKEND(PetscCtxRt data)
6978 {
6979   MatMatMPIAIJBACKEND *mmdata = *(MatMatMPIAIJBACKEND **)data;
6980   PetscInt             i;
6981 
6982   PetscFunctionBegin;
6983   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6984   PetscCall(PetscFree(mmdata->bufa));
6985   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6986   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6987   PetscCall(MatDestroy(&mmdata->P_oth));
6988   PetscCall(MatDestroy(&mmdata->Bloc));
6989   PetscCall(PetscSFDestroy(&mmdata->sf));
6990   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6991   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6992   PetscCall(PetscFree(mmdata->own[0]));
6993   PetscCall(PetscFree(mmdata->own));
6994   PetscCall(PetscFree(mmdata->off[0]));
6995   PetscCall(PetscFree(mmdata->off));
6996   PetscCall(PetscFree(mmdata));
6997   PetscFunctionReturn(PETSC_SUCCESS);
6998 }
6999 
7000 /* Copy selected n entries with indices in idx[] of A to v[].
7001    If idx is NULL, copy the whole data array of A to v[]
7002  */
MatSeqAIJCopySubArray(Mat A,PetscInt n,const PetscInt idx[],PetscScalar v[])7003 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7004 {
7005   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7006 
7007   PetscFunctionBegin;
7008   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7009   if (f) {
7010     PetscCall((*f)(A, n, idx, v));
7011   } else {
7012     const PetscScalar *vv;
7013 
7014     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7015     if (n && idx) {
7016       PetscScalar    *w  = v;
7017       const PetscInt *oi = idx;
7018       PetscInt        j;
7019 
7020       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7021     } else {
7022       PetscCall(PetscArraycpy(v, vv, n));
7023     }
7024     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7025   }
7026   PetscFunctionReturn(PETSC_SUCCESS);
7027 }
7028 
MatProductNumeric_MPIAIJBACKEND(Mat C)7029 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7030 {
7031   MatMatMPIAIJBACKEND *mmdata;
7032   PetscInt             i, n_d, n_o;
7033 
7034   PetscFunctionBegin;
7035   MatCheckProduct(C, 1);
7036   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7037   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7038   if (!mmdata->reusesym) { /* update temporary matrices */
7039     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7040     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7041   }
7042   mmdata->reusesym = PETSC_FALSE;
7043 
7044   for (i = 0; i < mmdata->cp; i++) {
7045     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7046     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7047   }
7048   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7049     PetscInt noff;
7050 
7051     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7052     if (mmdata->mptmp[i]) continue;
7053     if (noff) {
7054       PetscInt nown;
7055 
7056       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7057       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7058       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7059       n_o += noff;
7060       n_d += nown;
7061     } else {
7062       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7063 
7064       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7065       n_d += mm->nz;
7066     }
7067   }
7068   if (mmdata->hasoffproc) { /* offprocess insertion */
7069     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7070     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7071   }
7072   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7073   PetscFunctionReturn(PETSC_SUCCESS);
7074 }
7075 
7076 /* Support for Pt * A, A * P, or Pt * A * P */
7077 #define MAX_NUMBER_INTERMEDIATE 4
MatProductSymbolic_MPIAIJBACKEND(Mat C)7078 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7079 {
7080   Mat_Product           *product = C->product;
7081   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7082   Mat_MPIAIJ            *a, *p;
7083   MatMatMPIAIJBACKEND   *mmdata;
7084   ISLocalToGlobalMapping P_oth_l2g = NULL;
7085   IS                     glob      = NULL;
7086   const char            *prefix;
7087   char                   pprefix[256];
7088   const PetscInt        *globidx, *P_oth_idx;
7089   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7090   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7091   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7092                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7093                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7094   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7095 
7096   MatProductType ptype;
7097   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7098   PetscMPIInt    size;
7099 
7100   PetscFunctionBegin;
7101   MatCheckProduct(C, 1);
7102   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7103   ptype = product->type;
7104   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7105     ptype                                          = MATPRODUCT_AB;
7106     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7107   }
7108   switch (ptype) {
7109   case MATPRODUCT_AB:
7110     A          = product->A;
7111     P          = product->B;
7112     m          = A->rmap->n;
7113     n          = P->cmap->n;
7114     M          = A->rmap->N;
7115     N          = P->cmap->N;
7116     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7117     break;
7118   case MATPRODUCT_AtB:
7119     P          = product->A;
7120     A          = product->B;
7121     m          = P->cmap->n;
7122     n          = A->cmap->n;
7123     M          = P->cmap->N;
7124     N          = A->cmap->N;
7125     hasoffproc = PETSC_TRUE;
7126     break;
7127   case MATPRODUCT_PtAP:
7128     A          = product->A;
7129     P          = product->B;
7130     m          = P->cmap->n;
7131     n          = P->cmap->n;
7132     M          = P->cmap->N;
7133     N          = P->cmap->N;
7134     hasoffproc = PETSC_TRUE;
7135     break;
7136   default:
7137     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7138   }
7139   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7140   if (size == 1) hasoffproc = PETSC_FALSE;
7141 
7142   /* defaults */
7143   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7144     mp[i]    = NULL;
7145     mptmp[i] = PETSC_FALSE;
7146     rmapt[i] = -1;
7147     cmapt[i] = -1;
7148     rmapa[i] = NULL;
7149     cmapa[i] = NULL;
7150   }
7151 
7152   /* customization */
7153   PetscCall(PetscNew(&mmdata));
7154   mmdata->reusesym = product->api_user;
7155   if (ptype == MATPRODUCT_AB) {
7156     if (product->api_user) {
7157       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7158       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7159       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7160       PetscOptionsEnd();
7161     } else {
7162       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7163       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7164       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7165       PetscOptionsEnd();
7166     }
7167   } else if (ptype == MATPRODUCT_PtAP) {
7168     if (product->api_user) {
7169       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7170       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7171       PetscOptionsEnd();
7172     } else {
7173       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7174       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7175       PetscOptionsEnd();
7176     }
7177   }
7178   a = (Mat_MPIAIJ *)A->data;
7179   p = (Mat_MPIAIJ *)P->data;
7180   PetscCall(MatSetSizes(C, m, n, M, N));
7181   PetscCall(PetscLayoutSetUp(C->rmap));
7182   PetscCall(PetscLayoutSetUp(C->cmap));
7183   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7184   PetscCall(MatGetOptionsPrefix(C, &prefix));
7185 
7186   cp = 0;
7187   switch (ptype) {
7188   case MATPRODUCT_AB: /* A * P */
7189     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7190 
7191     /* A_diag * P_local (merged or not) */
7192     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7193       /* P is product->B */
7194       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7195       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7196       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7197       PetscCall(MatProductSetFill(mp[cp], product->fill));
7198       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7199       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7200       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7201       mp[cp]->product->api_user = product->api_user;
7202       PetscCall(MatProductSetFromOptions(mp[cp]));
7203       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7204       PetscCall(ISGetIndices(glob, &globidx));
7205       rmapt[cp] = 1;
7206       cmapt[cp] = 2;
7207       cmapa[cp] = globidx;
7208       mptmp[cp] = PETSC_FALSE;
7209       cp++;
7210     } else { /* A_diag * P_diag and A_diag * P_off */
7211       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7212       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7213       PetscCall(MatProductSetFill(mp[cp], product->fill));
7214       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7215       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7216       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7217       mp[cp]->product->api_user = product->api_user;
7218       PetscCall(MatProductSetFromOptions(mp[cp]));
7219       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7220       rmapt[cp] = 1;
7221       cmapt[cp] = 1;
7222       mptmp[cp] = PETSC_FALSE;
7223       cp++;
7224       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7225       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7226       PetscCall(MatProductSetFill(mp[cp], product->fill));
7227       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7228       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7229       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7230       mp[cp]->product->api_user = product->api_user;
7231       PetscCall(MatProductSetFromOptions(mp[cp]));
7232       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7233       rmapt[cp] = 1;
7234       cmapt[cp] = 2;
7235       cmapa[cp] = p->garray;
7236       mptmp[cp] = PETSC_FALSE;
7237       cp++;
7238     }
7239 
7240     /* A_off * P_other */
7241     if (mmdata->P_oth) {
7242       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7243       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7244       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7245       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7246       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7247       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7248       PetscCall(MatProductSetFill(mp[cp], product->fill));
7249       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7250       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7251       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7252       mp[cp]->product->api_user = product->api_user;
7253       PetscCall(MatProductSetFromOptions(mp[cp]));
7254       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7255       rmapt[cp] = 1;
7256       cmapt[cp] = 2;
7257       cmapa[cp] = P_oth_idx;
7258       mptmp[cp] = PETSC_FALSE;
7259       cp++;
7260     }
7261     break;
7262 
7263   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7264     /* A is product->B */
7265     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7266     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7267       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7268       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7269       PetscCall(MatProductSetFill(mp[cp], product->fill));
7270       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7271       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7272       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7273       mp[cp]->product->api_user = product->api_user;
7274       PetscCall(MatProductSetFromOptions(mp[cp]));
7275       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7276       PetscCall(ISGetIndices(glob, &globidx));
7277       rmapt[cp] = 2;
7278       rmapa[cp] = globidx;
7279       cmapt[cp] = 2;
7280       cmapa[cp] = globidx;
7281       mptmp[cp] = PETSC_FALSE;
7282       cp++;
7283     } else {
7284       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7285       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7286       PetscCall(MatProductSetFill(mp[cp], product->fill));
7287       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7288       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7289       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7290       mp[cp]->product->api_user = product->api_user;
7291       PetscCall(MatProductSetFromOptions(mp[cp]));
7292       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7293       PetscCall(ISGetIndices(glob, &globidx));
7294       rmapt[cp] = 1;
7295       cmapt[cp] = 2;
7296       cmapa[cp] = globidx;
7297       mptmp[cp] = PETSC_FALSE;
7298       cp++;
7299       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7300       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7301       PetscCall(MatProductSetFill(mp[cp], product->fill));
7302       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7303       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7304       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7305       mp[cp]->product->api_user = product->api_user;
7306       PetscCall(MatProductSetFromOptions(mp[cp]));
7307       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7308       rmapt[cp] = 2;
7309       rmapa[cp] = p->garray;
7310       cmapt[cp] = 2;
7311       cmapa[cp] = globidx;
7312       mptmp[cp] = PETSC_FALSE;
7313       cp++;
7314     }
7315     break;
7316   case MATPRODUCT_PtAP:
7317     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7318     /* P is product->B */
7319     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7320     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7321     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7322     PetscCall(MatProductSetFill(mp[cp], product->fill));
7323     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7324     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7325     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7326     mp[cp]->product->api_user = product->api_user;
7327     PetscCall(MatProductSetFromOptions(mp[cp]));
7328     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7329     PetscCall(ISGetIndices(glob, &globidx));
7330     rmapt[cp] = 2;
7331     rmapa[cp] = globidx;
7332     cmapt[cp] = 2;
7333     cmapa[cp] = globidx;
7334     mptmp[cp] = PETSC_FALSE;
7335     cp++;
7336     if (mmdata->P_oth) {
7337       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7338       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7339       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7340       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7341       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7342       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7343       PetscCall(MatProductSetFill(mp[cp], product->fill));
7344       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7345       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7346       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7347       mp[cp]->product->api_user = product->api_user;
7348       PetscCall(MatProductSetFromOptions(mp[cp]));
7349       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7350       mptmp[cp] = PETSC_TRUE;
7351       cp++;
7352       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7353       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7354       PetscCall(MatProductSetFill(mp[cp], product->fill));
7355       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7356       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7357       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7358       mp[cp]->product->api_user = product->api_user;
7359       PetscCall(MatProductSetFromOptions(mp[cp]));
7360       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7361       rmapt[cp] = 2;
7362       rmapa[cp] = globidx;
7363       cmapt[cp] = 2;
7364       cmapa[cp] = P_oth_idx;
7365       mptmp[cp] = PETSC_FALSE;
7366       cp++;
7367     }
7368     break;
7369   default:
7370     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7371   }
7372   /* sanity check */
7373   if (size > 1)
7374     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7375 
7376   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7377   for (i = 0; i < cp; i++) {
7378     mmdata->mp[i]    = mp[i];
7379     mmdata->mptmp[i] = mptmp[i];
7380   }
7381   mmdata->cp             = cp;
7382   C->product->data       = mmdata;
7383   C->product->destroy    = MatProductCtxDestroy_MatMatMPIAIJBACKEND;
7384   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7385 
7386   /* memory type */
7387   mmdata->mtype = PETSC_MEMTYPE_HOST;
7388   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7389   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7390   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7391   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7392   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7393   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7394 
7395   /* prepare coo coordinates for values insertion */
7396 
7397   /* count total nonzeros of those intermediate seqaij Mats
7398     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7399     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7400     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7401   */
7402   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7403     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7404     if (mptmp[cp]) continue;
7405     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7406       const PetscInt *rmap = rmapa[cp];
7407       const PetscInt  mr   = mp[cp]->rmap->n;
7408       const PetscInt  rs   = C->rmap->rstart;
7409       const PetscInt  re   = C->rmap->rend;
7410       const PetscInt *ii   = mm->i;
7411       for (i = 0; i < mr; i++) {
7412         const PetscInt gr = rmap[i];
7413         const PetscInt nz = ii[i + 1] - ii[i];
7414         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7415         else ncoo_oown += nz;                  /* this row is local */
7416       }
7417     } else ncoo_d += mm->nz;
7418   }
7419 
7420   /*
7421     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7422 
7423     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7424 
7425     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7426 
7427     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7428     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7429     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7430 
7431     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7432     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7433   */
7434   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7435   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7436 
7437   /* gather (i,j) of nonzeros inserted by remote procs */
7438   if (hasoffproc) {
7439     PetscSF  msf;
7440     PetscInt ncoo2, *coo_i2, *coo_j2;
7441 
7442     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7443     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7444     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7445 
7446     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7447       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7448       PetscInt   *idxoff = mmdata->off[cp];
7449       PetscInt   *idxown = mmdata->own[cp];
7450       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7451         const PetscInt *rmap = rmapa[cp];
7452         const PetscInt *cmap = cmapa[cp];
7453         const PetscInt *ii   = mm->i;
7454         PetscInt       *coi  = coo_i + ncoo_o;
7455         PetscInt       *coj  = coo_j + ncoo_o;
7456         const PetscInt  mr   = mp[cp]->rmap->n;
7457         const PetscInt  rs   = C->rmap->rstart;
7458         const PetscInt  re   = C->rmap->rend;
7459         const PetscInt  cs   = C->cmap->rstart;
7460         for (i = 0; i < mr; i++) {
7461           const PetscInt *jj = mm->j + ii[i];
7462           const PetscInt  gr = rmap[i];
7463           const PetscInt  nz = ii[i + 1] - ii[i];
7464           if (gr < rs || gr >= re) { /* this is an offproc row */
7465             for (j = ii[i]; j < ii[i + 1]; j++) {
7466               *coi++    = gr;
7467               *idxoff++ = j;
7468             }
7469             if (!cmapt[cp]) { /* already global */
7470               for (j = 0; j < nz; j++) *coj++ = jj[j];
7471             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7472               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7473             } else { /* offdiag */
7474               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7475             }
7476             ncoo_o += nz;
7477           } else { /* this is a local row */
7478             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7479           }
7480         }
7481       }
7482       mmdata->off[cp + 1] = idxoff;
7483       mmdata->own[cp + 1] = idxown;
7484     }
7485 
7486     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7487     PetscInt incoo_o;
7488     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7489     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7490     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7491     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7492     ncoo = ncoo_d + ncoo_oown + ncoo2;
7493     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7494     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7495     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7496     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7497     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7498     PetscCall(PetscFree2(coo_i, coo_j));
7499     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7500     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7501     coo_i = coo_i2;
7502     coo_j = coo_j2;
7503   } else { /* no offproc values insertion */
7504     ncoo = ncoo_d;
7505     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7506 
7507     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7508     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7509     PetscCall(PetscSFSetUp(mmdata->sf));
7510   }
7511   mmdata->hasoffproc = hasoffproc;
7512 
7513   /* gather (i,j) of nonzeros inserted locally */
7514   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7515     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7516     PetscInt       *coi  = coo_i + ncoo_d;
7517     PetscInt       *coj  = coo_j + ncoo_d;
7518     const PetscInt *jj   = mm->j;
7519     const PetscInt *ii   = mm->i;
7520     const PetscInt *cmap = cmapa[cp];
7521     const PetscInt *rmap = rmapa[cp];
7522     const PetscInt  mr   = mp[cp]->rmap->n;
7523     const PetscInt  rs   = C->rmap->rstart;
7524     const PetscInt  re   = C->rmap->rend;
7525     const PetscInt  cs   = C->cmap->rstart;
7526 
7527     if (mptmp[cp]) continue;
7528     if (rmapt[cp] == 1) { /* consecutive rows */
7529       /* fill coo_i */
7530       for (i = 0; i < mr; i++) {
7531         const PetscInt gr = i + rs;
7532         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7533       }
7534       /* fill coo_j */
7535       if (!cmapt[cp]) { /* type-0, already global */
7536         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7537       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7538         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7539       } else {                                            /* type-2, local to global for sparse columns */
7540         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7541       }
7542       ncoo_d += mm->nz;
7543     } else if (rmapt[cp] == 2) { /* sparse rows */
7544       for (i = 0; i < mr; i++) {
7545         const PetscInt *jj = mm->j + ii[i];
7546         const PetscInt  gr = rmap[i];
7547         const PetscInt  nz = ii[i + 1] - ii[i];
7548         if (gr >= rs && gr < re) { /* local rows */
7549           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7550           if (!cmapt[cp]) { /* type-0, already global */
7551             for (j = 0; j < nz; j++) *coj++ = jj[j];
7552           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7553             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7554           } else { /* type-2, local to global for sparse columns */
7555             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7556           }
7557           ncoo_d += nz;
7558         }
7559       }
7560     }
7561   }
7562   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7563   PetscCall(ISDestroy(&glob));
7564   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7565   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7566   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7567   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7568 
7569   /* set block sizes */
7570   A = product->A;
7571   P = product->B;
7572   switch (ptype) {
7573   case MATPRODUCT_PtAP:
7574     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7575     break;
7576   case MATPRODUCT_RARt:
7577     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7578     break;
7579   case MATPRODUCT_ABC:
7580     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7581     break;
7582   case MATPRODUCT_AB:
7583     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7584     break;
7585   case MATPRODUCT_AtB:
7586     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7587     break;
7588   case MATPRODUCT_ABt:
7589     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7590     break;
7591   default:
7592     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7593   }
7594 
7595   /* preallocate with COO data */
7596   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7597   PetscCall(PetscFree2(coo_i, coo_j));
7598   PetscFunctionReturn(PETSC_SUCCESS);
7599 }
7600 
MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)7601 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7602 {
7603   Mat_Product *product = mat->product;
7604 #if defined(PETSC_HAVE_DEVICE)
7605   PetscBool match  = PETSC_FALSE;
7606   PetscBool usecpu = PETSC_FALSE;
7607 #else
7608   PetscBool match = PETSC_TRUE;
7609 #endif
7610 
7611   PetscFunctionBegin;
7612   MatCheckProduct(mat, 1);
7613 #if defined(PETSC_HAVE_DEVICE)
7614   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7615   if (match) { /* we can always fallback to the CPU if requested */
7616     switch (product->type) {
7617     case MATPRODUCT_AB:
7618       if (product->api_user) {
7619         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7620         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7621         PetscOptionsEnd();
7622       } else {
7623         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7624         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7625         PetscOptionsEnd();
7626       }
7627       break;
7628     case MATPRODUCT_AtB:
7629       if (product->api_user) {
7630         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7631         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7632         PetscOptionsEnd();
7633       } else {
7634         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7635         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7636         PetscOptionsEnd();
7637       }
7638       break;
7639     case MATPRODUCT_PtAP:
7640       if (product->api_user) {
7641         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7642         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7643         PetscOptionsEnd();
7644       } else {
7645         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7646         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7647         PetscOptionsEnd();
7648       }
7649       break;
7650     default:
7651       break;
7652     }
7653     match = (PetscBool)!usecpu;
7654   }
7655 #endif
7656   if (match) {
7657     switch (product->type) {
7658     case MATPRODUCT_AB:
7659     case MATPRODUCT_AtB:
7660     case MATPRODUCT_PtAP:
7661       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7662       break;
7663     default:
7664       break;
7665     }
7666   }
7667   /* fallback to MPIAIJ ops */
7668   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7669   PetscFunctionReturn(PETSC_SUCCESS);
7670 }
7671 
7672 /*
7673    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7674 
7675    n - the number of block indices in cc[]
7676    cc - the block indices (must be large enough to contain the indices)
7677 */
MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt * n,PetscInt * cc)7678 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7679 {
7680   PetscInt        cnt = -1, nidx, j;
7681   const PetscInt *idx;
7682 
7683   PetscFunctionBegin;
7684   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7685   if (nidx) {
7686     cnt     = 0;
7687     cc[cnt] = idx[0] / bs;
7688     for (j = 1; j < nidx; j++) {
7689       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7690     }
7691   }
7692   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7693   *n = cnt + 1;
7694   PetscFunctionReturn(PETSC_SUCCESS);
7695 }
7696 
7697 /*
7698     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7699 
7700     ncollapsed - the number of block indices
7701     collapsed - the block indices (must be large enough to contain the indices)
7702 */
MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt * w0,PetscInt * w1,PetscInt * w2,PetscInt * ncollapsed,PetscInt ** collapsed)7703 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7704 {
7705   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7706 
7707   PetscFunctionBegin;
7708   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7709   for (i = start + 1; i < start + bs; i++) {
7710     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7711     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7712     cprevtmp = cprev;
7713     cprev    = merged;
7714     merged   = cprevtmp;
7715   }
7716   *ncollapsed = nprev;
7717   if (collapsed) *collapsed = cprev;
7718   PetscFunctionReturn(PETSC_SUCCESS);
7719 }
7720 
7721 /*
7722  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7723 
7724  Input Parameter:
7725  . Amat - matrix
7726  - symmetrize - make the result symmetric
7727  + scale - scale with diagonal
7728 
7729  Output Parameter:
7730  . a_Gmat - output scalar graph >= 0
7731 
7732 */
MatCreateGraph_Simple_AIJ(Mat Amat,PetscBool symmetrize,PetscBool scale,PetscReal filter,PetscInt index_size,PetscInt index[],Mat * a_Gmat)7733 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7734 {
7735   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7736   MPI_Comm  comm;
7737   Mat       Gmat;
7738   PetscBool ismpiaij, isseqaij;
7739   Mat       a, b, c;
7740   MatType   jtype;
7741 
7742   PetscFunctionBegin;
7743   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7744   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7745   PetscCall(MatGetSize(Amat, &MM, &NN));
7746   PetscCall(MatGetBlockSize(Amat, &bs));
7747   nloc = (Iend - Istart) / bs;
7748 
7749   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7750   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7751   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7752 
7753   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7754   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7755      implementation */
7756   if (bs > 1) {
7757     PetscCall(MatGetType(Amat, &jtype));
7758     PetscCall(MatCreate(comm, &Gmat));
7759     PetscCall(MatSetType(Gmat, jtype));
7760     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7761     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7762     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7763       PetscInt  *d_nnz, *o_nnz;
7764       MatScalar *aa, val, *AA;
7765       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7766 
7767       if (isseqaij) {
7768         a = Amat;
7769         b = NULL;
7770       } else {
7771         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7772         a             = d->A;
7773         b             = d->B;
7774       }
7775       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7776       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7777       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7778         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7779         const PetscInt *cols1, *cols2;
7780 
7781         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7782           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7783           nnz[brow / bs] = nc2 / bs;
7784           if (nc2 % bs) ok = 0;
7785           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7786           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7787             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7788             if (nc1 != nc2) ok = 0;
7789             else {
7790               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7791                 if (cols1[jj] != cols2[jj]) ok = 0;
7792                 if (cols1[jj] % bs != jj % bs) ok = 0;
7793               }
7794             }
7795             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7796           }
7797           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7798           if (!ok) {
7799             PetscCall(PetscFree2(d_nnz, o_nnz));
7800             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7801             goto old_bs;
7802           }
7803         }
7804       }
7805       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7806       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7807       PetscCall(PetscFree2(d_nnz, o_nnz));
7808       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7809       // diag
7810       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7811         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7812 
7813         ai = aseq->i;
7814         n  = ai[brow + 1] - ai[brow];
7815         aj = aseq->j + ai[brow];
7816         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7817           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7818           val        = 0;
7819           if (index_size == 0) {
7820             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7821               aa = aseq->a + ai[brow + ii] + k;
7822               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7823                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7824               }
7825             }
7826           } else {                                            // use (index,index) value if provided
7827             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7828               PetscInt ii = index[iii];
7829               aa          = aseq->a + ai[brow + ii] + k;
7830               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7831                 PetscInt jj = index[jjj];
7832                 val += PetscAbs(PetscRealPart(aa[jj]));
7833               }
7834             }
7835           }
7836           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7837           AA[k / bs] = val;
7838         }
7839         grow = Istart / bs + brow / bs;
7840         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7841       }
7842       // off-diag
7843       if (ismpiaij) {
7844         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7845         const PetscScalar *vals;
7846         const PetscInt    *cols, *garray = aij->garray;
7847 
7848         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7849         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7850           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7851           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7852             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7853             AA[k / bs] = 0;
7854             AJ[cidx]   = garray[cols[k]] / bs;
7855           }
7856           nc = ncols / bs;
7857           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7858           if (index_size == 0) {
7859             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7860               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7861               for (PetscInt k = 0; k < ncols; k += bs) {
7862                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7863                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7864                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7865                 }
7866               }
7867               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7868             }
7869           } else {                                            // use (index,index) value if provided
7870             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7871               PetscInt ii = index[iii];
7872               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7873               for (PetscInt k = 0; k < ncols; k += bs) {
7874                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7875                   PetscInt jj = index[jjj];
7876                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7877                 }
7878               }
7879               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7880             }
7881           }
7882           grow = Istart / bs + brow / bs;
7883           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7884         }
7885       }
7886       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7887       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7888       PetscCall(PetscFree2(AA, AJ));
7889     } else {
7890       const PetscScalar *vals;
7891       const PetscInt    *idx;
7892       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7893     old_bs:
7894       /*
7895        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7896        */
7897       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7898       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7899       if (isseqaij) {
7900         PetscInt max_d_nnz;
7901 
7902         /*
7903          Determine exact preallocation count for (sequential) scalar matrix
7904          */
7905         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7906         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7907         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7908         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7909         PetscCall(PetscFree3(w0, w1, w2));
7910       } else if (ismpiaij) {
7911         Mat             Daij, Oaij;
7912         const PetscInt *garray;
7913         PetscInt        max_d_nnz;
7914 
7915         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7916         /*
7917          Determine exact preallocation count for diagonal block portion of scalar matrix
7918          */
7919         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7920         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7921         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7922         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7923         PetscCall(PetscFree3(w0, w1, w2));
7924         /*
7925          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7926          */
7927         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7928           o_nnz[jj] = 0;
7929           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7930             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7931             o_nnz[jj] += ncols;
7932             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7933           }
7934           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7935         }
7936       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7937       /* get scalar copy (norms) of matrix */
7938       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7939       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7940       PetscCall(PetscFree2(d_nnz, o_nnz));
7941       for (Ii = Istart; Ii < Iend; Ii++) {
7942         PetscInt dest_row = Ii / bs;
7943 
7944         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7945         for (jj = 0; jj < ncols; jj++) {
7946           PetscInt    dest_col = idx[jj] / bs;
7947           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7948 
7949           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7950         }
7951         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7952       }
7953       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7954       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7955     }
7956   } else {
7957     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7958     else {
7959       Gmat = Amat;
7960       PetscCall(PetscObjectReference((PetscObject)Gmat));
7961     }
7962     if (isseqaij) {
7963       a = Gmat;
7964       b = NULL;
7965     } else {
7966       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7967       a             = d->A;
7968       b             = d->B;
7969     }
7970     if (filter >= 0 || scale) {
7971       /* take absolute value of each entry */
7972       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7973         MatInfo      info;
7974         PetscScalar *avals;
7975 
7976         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7977         PetscCall(MatSeqAIJGetArray(c, &avals));
7978         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7979         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7980       }
7981     }
7982   }
7983   if (symmetrize) {
7984     PetscBool isset, issym;
7985 
7986     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7987     if (!isset || !issym) {
7988       Mat matTrans;
7989 
7990       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7991       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7992       PetscCall(MatDestroy(&matTrans));
7993     }
7994     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7995   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7996   if (scale) {
7997     /* scale c for all diagonal values = 1 or -1 */
7998     Vec diag;
7999 
8000     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8001     PetscCall(MatGetDiagonal(Gmat, diag));
8002     PetscCall(VecReciprocal(diag));
8003     PetscCall(VecSqrtAbs(diag));
8004     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8005     PetscCall(VecDestroy(&diag));
8006   }
8007   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8008   if (filter >= 0) {
8009     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8010     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8011   }
8012   *a_Gmat = Gmat;
8013   PetscFunctionReturn(PETSC_SUCCESS);
8014 }
8015 
MatGetCurrentMemType_MPIAIJ(Mat A,PetscMemType * memtype)8016 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype)
8017 {
8018   Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
8019   PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST;
8020 
8021   PetscFunctionBegin;
8022   if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD));
8023   if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO));
8024   *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST;
8025   PetscFunctionReturn(PETSC_SUCCESS);
8026 }
8027 
8028 /*
8029     Special version for direct calls from Fortran
8030 */
8031 
8032 /* Change these macros so can be used in void function */
8033 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8034 #undef PetscCall
8035 #define PetscCall(...) \
8036   do { \
8037     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8038     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8039       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8040       return; \
8041     } \
8042   } while (0)
8043 
8044 #undef SETERRQ
8045 #define SETERRQ(comm, ierr, ...) \
8046   do { \
8047     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8048     return; \
8049   } while (0)
8050 
8051 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8052   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8053 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8054   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8055 #else
8056 #endif
matsetvaluesmpiaij_(Mat * mmat,PetscInt * mm,const PetscInt im[],PetscInt * mn,const PetscInt in[],const PetscScalar v[],InsertMode * maddv,PetscErrorCode * _ierr)8057 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8058 {
8059   Mat         mat = *mmat;
8060   PetscInt    m = *mm, n = *mn;
8061   InsertMode  addv = *maddv;
8062   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8063   PetscScalar value;
8064 
8065   MatCheckPreallocated(mat, 1);
8066   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8067   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8068   {
8069     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8070     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8071     PetscBool roworiented = aij->roworiented;
8072 
8073     /* Some Variables required in the macro */
8074     Mat         A     = aij->A;
8075     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8076     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8077     MatScalar  *aa;
8078     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8079     Mat         B                 = aij->B;
8080     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8081     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8082     MatScalar  *ba;
8083     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8084      * cannot use "#if defined" inside a macro. */
8085     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8086 
8087     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8088     PetscInt   nonew = a->nonew;
8089     MatScalar *ap1, *ap2;
8090 
8091     PetscFunctionBegin;
8092     PetscCall(MatSeqAIJGetArray(A, &aa));
8093     PetscCall(MatSeqAIJGetArray(B, &ba));
8094     for (i = 0; i < m; i++) {
8095       if (im[i] < 0) continue;
8096       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8097       if (im[i] >= rstart && im[i] < rend) {
8098         row      = im[i] - rstart;
8099         lastcol1 = -1;
8100         rp1      = aj + ai[row];
8101         ap1      = aa + ai[row];
8102         rmax1    = aimax[row];
8103         nrow1    = ailen[row];
8104         low1     = 0;
8105         high1    = nrow1;
8106         lastcol2 = -1;
8107         rp2      = bj + bi[row];
8108         ap2      = ba + bi[row];
8109         rmax2    = bimax[row];
8110         nrow2    = bilen[row];
8111         low2     = 0;
8112         high2    = nrow2;
8113 
8114         for (j = 0; j < n; j++) {
8115           if (roworiented) value = v[i * n + j];
8116           else value = v[i + j * m];
8117           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8118           if (in[j] >= cstart && in[j] < cend) {
8119             col = in[j] - cstart;
8120             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8121           } else if (in[j] < 0) continue;
8122           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8123             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8124           } else {
8125             if (mat->was_assembled) {
8126               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8127 #if defined(PETSC_USE_CTABLE)
8128               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8129               col--;
8130 #else
8131               col = aij->colmap[in[j]] - 1;
8132 #endif
8133               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8134                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8135                 col = in[j];
8136                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8137                 B        = aij->B;
8138                 b        = (Mat_SeqAIJ *)B->data;
8139                 bimax    = b->imax;
8140                 bi       = b->i;
8141                 bilen    = b->ilen;
8142                 bj       = b->j;
8143                 rp2      = bj + bi[row];
8144                 ap2      = ba + bi[row];
8145                 rmax2    = bimax[row];
8146                 nrow2    = bilen[row];
8147                 low2     = 0;
8148                 high2    = nrow2;
8149                 bm       = aij->B->rmap->n;
8150                 ba       = b->a;
8151                 inserted = PETSC_FALSE;
8152               }
8153             } else col = in[j];
8154             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8155           }
8156         }
8157       } else if (!aij->donotstash) {
8158         if (roworiented) {
8159           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8160         } else {
8161           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8162         }
8163       }
8164     }
8165     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8166     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8167   }
8168   PetscFunctionReturnVoid();
8169 }
8170 
8171 /* Undefining these here since they were redefined from their original definition above! No
8172  * other PETSc functions should be defined past this point, as it is impossible to recover the
8173  * original definitions */
8174 #undef PetscCall
8175 #undef SETERRQ
8176