1 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/utils/freespace.h>
3
4 /*@
5 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
6
7 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
8
9 Input Parameter:
10 . A - the `MATMAIJ` matrix
11
12 Output Parameter:
13 . B - the `MATAIJ` matrix
14
15 Level: advanced
16
17 Note:
18 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
19
20 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21 @*/
MatMAIJGetAIJ(Mat A,Mat * B)22 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23 {
24 PetscBool ismpimaij, isseqmaij;
25
26 PetscFunctionBegin;
27 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29 if (ismpimaij) {
30 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31
32 *B = b->A;
33 } else if (isseqmaij) {
34 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35
36 *B = b->AIJ;
37 } else {
38 *B = A;
39 }
40 PetscFunctionReturn(PETSC_SUCCESS);
41 }
42
43 /*@
44 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45
46 Logically Collective
47
48 Input Parameters:
49 + A - the `MATMAIJ` matrix
50 - dof - the block size for the new matrix
51
52 Output Parameter:
53 . B - the new `MATMAIJ` matrix
54
55 Level: advanced
56
57 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58 @*/
MatMAIJRedimension(Mat A,PetscInt dof,Mat * B)59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60 {
61 Mat Aij = NULL;
62
63 PetscFunctionBegin;
64 PetscValidLogicalCollectiveInt(A, dof, 2);
65 PetscCall(MatMAIJGetAIJ(A, &Aij));
66 PetscCall(MatCreateMAIJ(Aij, dof, B));
67 PetscFunctionReturn(PETSC_SUCCESS);
68 }
69
MatDestroy_SeqMAIJ(Mat A)70 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71 {
72 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
73
74 PetscFunctionBegin;
75 PetscCall(MatDestroy(&b->AIJ));
76 PetscCall(PetscFree(A->data));
77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
80 PetscFunctionReturn(PETSC_SUCCESS);
81 }
82
MatSetUp_MAIJ(Mat A)83 static PetscErrorCode MatSetUp_MAIJ(Mat A)
84 {
85 PetscFunctionBegin;
86 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87 }
88
MatView_SeqMAIJ(Mat A,PetscViewer viewer)89 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90 {
91 Mat B;
92
93 PetscFunctionBegin;
94 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
95 PetscCall(MatView(B, viewer));
96 PetscCall(MatDestroy(&B));
97 PetscFunctionReturn(PETSC_SUCCESS);
98 }
99
MatView_MPIMAIJ(Mat A,PetscViewer viewer)100 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101 {
102 Mat B;
103
104 PetscFunctionBegin;
105 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
106 PetscCall(MatView(B, viewer));
107 PetscCall(MatDestroy(&B));
108 PetscFunctionReturn(PETSC_SUCCESS);
109 }
110
MatDestroy_MPIMAIJ(Mat A)111 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112 {
113 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
114
115 PetscFunctionBegin;
116 PetscCall(MatDestroy(&b->AIJ));
117 PetscCall(MatDestroy(&b->OAIJ));
118 PetscCall(MatDestroy(&b->A));
119 PetscCall(VecScatterDestroy(&b->ctx));
120 PetscCall(VecDestroy(&b->w));
121 PetscCall(PetscFree(A->data));
122 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
125 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
126 PetscFunctionReturn(PETSC_SUCCESS);
127 }
128
129 /*MC
130 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131 multicomponent problems, interpolating or restricting each component the same way independently.
132 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
133
134 Operations provided:
135 .vb
136 MatMult()
137 MatMultTranspose()
138 MatMultAdd()
139 MatMultTransposeAdd()
140 .ve
141
142 Level: advanced
143
144 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
145 M*/
146
MatCreate_MAIJ(Mat A)147 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148 {
149 Mat_MPIMAIJ *b;
150 PetscMPIInt size;
151
152 PetscFunctionBegin;
153 PetscCall(PetscNew(&b));
154 A->data = (void *)b;
155
156 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
157
158 A->ops->setup = MatSetUp_MAIJ;
159
160 b->AIJ = NULL;
161 b->dof = 0;
162 b->OAIJ = NULL;
163 b->ctx = NULL;
164 b->w = NULL;
165 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
166 if (size == 1) {
167 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
168 } else {
169 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
170 }
171 A->preallocated = PETSC_TRUE;
172 A->assembled = PETSC_TRUE;
173 PetscFunctionReturn(PETSC_SUCCESS);
174 }
175
176 #if PetscHasAttribute(always_inline)
177 #define PETSC_FORCE_INLINE __attribute__((always_inline))
178 #else
179 #define PETSC_FORCE_INLINE
180 #endif
181
182 #if defined(__clang__)
183 #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184 #else
185 #define PETSC_PRAGMA_UNROLL
186 #endif
187
188 enum {
189 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
190 };
191
192 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
193 // keyword into account for these...
MatMult_MatMultAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)194 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
195 {
196 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
197 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
198 const Mat baij = b->AIJ;
199 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
200 const PetscInt m = baij->rmap->n;
201 const PetscInt nz = a->nz;
202 const PetscInt *idx = a->j;
203 const PetscInt *ii = a->i;
204 const PetscScalar *v = a->a;
205 PetscInt nonzerorow = 0;
206 const PetscScalar *x;
207 PetscScalar *z;
208
209 PetscFunctionBegin;
210 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
211 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
212 PetscCall(VecGetArrayRead(xx, &x));
213 if (mult_add) {
214 PetscCall(VecGetArray(zz, &z));
215 } else {
216 PetscCall(VecGetArrayWrite(zz, &z));
217 }
218
219 for (PetscInt i = 0; i < m; ++i) {
220 PetscInt jrow = ii[i];
221 const PetscInt n = ii[i + 1] - jrow;
222 // leave a line so clang-format does not align these decls
223 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
224
225 nonzerorow += n > 0;
226 for (PetscInt j = 0; j < n; ++j, ++jrow) {
227 const PetscScalar v_jrow = v[jrow];
228 const PetscInt N_idx_jrow = N * idx[jrow];
229
230 PETSC_PRAGMA_UNROLL
231 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
232 }
233
234 PETSC_PRAGMA_UNROLL
235 for (int k = 0; k < N; ++k) {
236 const PetscInt z_idx = N * i + k;
237
238 if (mult_add) {
239 z[z_idx] += sum[k];
240 } else {
241 z[z_idx] = sum[k];
242 }
243 }
244 }
245 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
246 PetscCall(VecRestoreArrayRead(xx, &x));
247 if (mult_add) {
248 PetscCall(VecRestoreArray(zz, &z));
249 } else {
250 PetscCall(VecRestoreArrayWrite(zz, &z));
251 }
252 PetscFunctionReturn(PETSC_SUCCESS);
253 }
254
MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)255 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
256 {
257 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
258 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
259 const Mat baij = b->AIJ;
260 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
261 const PetscInt m = baij->rmap->n;
262 const PetscInt nz = a->nz;
263 const PetscInt *a_j = a->j;
264 const PetscInt *a_i = a->i;
265 const PetscScalar *a_a = a->a;
266 const PetscScalar *x;
267 PetscScalar *z;
268
269 PetscFunctionBegin;
270 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
271 if (mult_add) {
272 if (yy != zz) PetscCall(VecCopy(yy, zz));
273 } else {
274 PetscCall(VecSet(zz, 0.0));
275 }
276 PetscCall(VecGetArrayRead(xx, &x));
277 PetscCall(VecGetArray(zz, &z));
278
279 for (PetscInt i = 0; i < m; i++) {
280 const PetscInt a_ii = a_i[i];
281 const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii);
282 const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii);
283 const PetscInt n = a_i[i + 1] - a_ii;
284 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
285
286 PETSC_PRAGMA_UNROLL
287 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
288 for (PetscInt j = 0; j < n; ++j) {
289 const PetscInt N_idx_j = N * idx[j];
290 const PetscScalar v_j = v[j];
291
292 PETSC_PRAGMA_UNROLL
293 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
294 }
295 }
296
297 PetscCall(PetscLogFlops(2 * N * nz));
298 PetscCall(VecRestoreArrayRead(xx, &x));
299 PetscCall(VecRestoreArray(zz, &z));
300 PetscFunctionReturn(PETSC_SUCCESS);
301 }
302
303 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
304 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
305 { \
306 PetscFunctionBegin; \
307 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
308 PetscFunctionReturn(PETSC_SUCCESS); \
309 } \
310 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
311 { \
312 PetscFunctionBegin; \
313 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
314 PetscFunctionReturn(PETSC_SUCCESS); \
315 } \
316 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
317 { \
318 PetscFunctionBegin; \
319 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
320 PetscFunctionReturn(PETSC_SUCCESS); \
321 } \
322 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
323 { \
324 PetscFunctionBegin; \
325 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
326 PetscFunctionReturn(PETSC_SUCCESS); \
327 }
328
329 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
330 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
341
342 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
343
MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)344 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345 {
346 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
347 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
348 const PetscScalar *x, *v;
349 PetscScalar *y, *sums;
350 const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
351 PetscInt n, i, jrow, j, dof = b->dof, k;
352
353 PetscFunctionBegin;
354 PetscCall(VecGetArrayRead(xx, &x));
355 PetscCall(VecSet(yy, 0.0));
356 PetscCall(VecGetArray(yy, &y));
357 idx = a->j;
358 v = a->a;
359 ii = a->i;
360
361 for (i = 0; i < m; i++) {
362 jrow = ii[i];
363 n = ii[i + 1] - jrow;
364 sums = y + dof * i;
365 for (j = 0; j < n; j++) {
366 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
367 jrow++;
368 }
369 }
370
371 PetscCall(PetscLogFlops(2.0 * dof * a->nz));
372 PetscCall(VecRestoreArrayRead(xx, &x));
373 PetscCall(VecRestoreArray(yy, &y));
374 PetscFunctionReturn(PETSC_SUCCESS);
375 }
376
MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)377 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378 {
379 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
380 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
381 const PetscScalar *x, *v;
382 PetscScalar *y, *sums;
383 const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
384 PetscInt n, i, jrow, j, dof = b->dof, k;
385
386 PetscFunctionBegin;
387 if (yy != zz) PetscCall(VecCopy(yy, zz));
388 PetscCall(VecGetArrayRead(xx, &x));
389 PetscCall(VecGetArray(zz, &y));
390 idx = a->j;
391 v = a->a;
392 ii = a->i;
393
394 for (i = 0; i < m; i++) {
395 jrow = ii[i];
396 n = ii[i + 1] - jrow;
397 sums = y + dof * i;
398 for (j = 0; j < n; j++) {
399 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
400 jrow++;
401 }
402 }
403
404 PetscCall(PetscLogFlops(2.0 * dof * a->nz));
405 PetscCall(VecRestoreArrayRead(xx, &x));
406 PetscCall(VecRestoreArray(zz, &y));
407 PetscFunctionReturn(PETSC_SUCCESS);
408 }
409
MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)410 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411 {
412 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
413 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
414 const PetscScalar *x, *v, *alpha;
415 PetscScalar *y;
416 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
417 PetscInt n, i, k;
418
419 PetscFunctionBegin;
420 PetscCall(VecGetArrayRead(xx, &x));
421 PetscCall(VecSet(yy, 0.0));
422 PetscCall(VecGetArray(yy, &y));
423 for (i = 0; i < m; i++) {
424 idx = PetscSafePointerPlusOffset(a->j, a->i[i]);
425 v = PetscSafePointerPlusOffset(a->a, a->i[i]);
426 n = a->i[i + 1] - a->i[i];
427 alpha = x + dof * i;
428 while (n-- > 0) {
429 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
430 idx++;
431 v++;
432 }
433 }
434 PetscCall(PetscLogFlops(2.0 * dof * a->nz));
435 PetscCall(VecRestoreArrayRead(xx, &x));
436 PetscCall(VecRestoreArray(yy, &y));
437 PetscFunctionReturn(PETSC_SUCCESS);
438 }
439
MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441 {
442 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
443 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
444 const PetscScalar *x, *v, *alpha;
445 PetscScalar *y;
446 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
447 PetscInt n, i, k;
448
449 PetscFunctionBegin;
450 if (yy != zz) PetscCall(VecCopy(yy, zz));
451 PetscCall(VecGetArrayRead(xx, &x));
452 PetscCall(VecGetArray(zz, &y));
453 for (i = 0; i < m; i++) {
454 idx = a->j + a->i[i];
455 v = a->a + a->i[i];
456 n = a->i[i + 1] - a->i[i];
457 alpha = x + dof * i;
458 while (n-- > 0) {
459 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
460 idx++;
461 v++;
462 }
463 }
464 PetscCall(PetscLogFlops(2.0 * dof * a->nz));
465 PetscCall(VecRestoreArrayRead(xx, &x));
466 PetscCall(VecRestoreArray(zz, &y));
467 PetscFunctionReturn(PETSC_SUCCESS);
468 }
469
MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)470 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471 {
472 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
473
474 PetscFunctionBegin;
475 /* start the scatter */
476 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
477 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
478 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
479 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
480 PetscFunctionReturn(PETSC_SUCCESS);
481 }
482
MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)483 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484 {
485 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
486
487 PetscFunctionBegin;
488 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
489 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
490 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
491 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
492 PetscFunctionReturn(PETSC_SUCCESS);
493 }
494
MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)495 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496 {
497 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
498
499 PetscFunctionBegin;
500 /* start the scatter */
501 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
502 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
503 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
504 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
505 PetscFunctionReturn(PETSC_SUCCESS);
506 }
507
MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)508 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509 {
510 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
511
512 PetscFunctionBegin;
513 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
514 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
515 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
516 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
517 PetscFunctionReturn(PETSC_SUCCESS);
518 }
519
MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)520 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521 {
522 Mat_Product *product = C->product;
523
524 PetscFunctionBegin;
525 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
526 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
527 PetscFunctionReturn(PETSC_SUCCESS);
528 }
529
MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)530 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
531 {
532 Mat_Product *product = C->product;
533 PetscBool flg = PETSC_FALSE;
534 Mat A = product->A, P = product->B;
535 PetscInt alg = 1; /* set default algorithm */
536 #if !defined(PETSC_HAVE_HYPRE)
537 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
538 PetscInt nalg = 4;
539 #else
540 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
541 PetscInt nalg = 5;
542 #endif
543
544 PetscFunctionBegin;
545 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
546
547 /* PtAP */
548 /* Check matrix local sizes */
549 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
550 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
551 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
552 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
553
554 /* Set the default algorithm */
555 PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
556 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
557
558 /* Get runtime option */
559 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
560 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
561 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
562 PetscOptionsEnd();
563
564 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
565 if (flg) {
566 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
567 PetscFunctionReturn(PETSC_SUCCESS);
568 }
569
570 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
571 if (flg) {
572 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
573 PetscFunctionReturn(PETSC_SUCCESS);
574 }
575
576 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
577 PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
578 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
579 PetscCall(MatProductSetFromOptions(C));
580 PetscFunctionReturn(PETSC_SUCCESS);
581 }
582
MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)583 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
584 {
585 /* This routine requires testing -- first draft only */
586 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
587 Mat P = pp->AIJ;
588 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
589 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
590 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
591 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
592 const PetscInt *ci = c->i, *cj = c->j, *cjj;
593 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
594 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
595 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
596 MatScalar *ca = c->a, *caj, *apa;
597
598 PetscFunctionBegin;
599 /* Allocate temporary array for storage of one row of A*P */
600 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
601
602 /* Clear old values in C */
603 PetscCall(PetscArrayzero(ca, ci[cm]));
604
605 for (i = 0; i < am; i++) {
606 /* Form sparse row of A*P */
607 anzi = ai[i + 1] - ai[i];
608 apnzj = 0;
609 for (j = 0; j < anzi; j++) {
610 /* Get offset within block of P */
611 pshift = *aj % ppdof;
612 /* Get block row of P */
613 prow = *aj++ / ppdof; /* integer division */
614 pnzj = pi[prow + 1] - pi[prow];
615 pjj = pj + pi[prow];
616 paj = pa + pi[prow];
617 for (k = 0; k < pnzj; k++) {
618 poffset = pjj[k] * ppdof + pshift;
619 if (!apjdense[poffset]) {
620 apjdense[poffset] = -1;
621 apj[apnzj++] = poffset;
622 }
623 apa[poffset] += (*aa) * paj[k];
624 }
625 PetscCall(PetscLogFlops(2.0 * pnzj));
626 aa++;
627 }
628
629 /* Sort the j index array for quick sparse axpy. */
630 /* Note: a array does not need sorting as it is in dense storage locations. */
631 PetscCall(PetscSortInt(apnzj, apj));
632
633 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
634 prow = i / ppdof; /* integer division */
635 pshift = i % ppdof;
636 poffset = pi[prow];
637 pnzi = pi[prow + 1] - poffset;
638 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
639 pJ = pj + poffset;
640 pA = pa + poffset;
641 for (j = 0; j < pnzi; j++) {
642 crow = (*pJ) * ppdof + pshift;
643 cjj = cj + ci[crow];
644 caj = ca + ci[crow];
645 pJ++;
646 /* Perform sparse axpy operation. Note cjj includes apj. */
647 for (k = 0, nextap = 0; nextap < apnzj; k++) {
648 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
649 }
650 PetscCall(PetscLogFlops(2.0 * apnzj));
651 pA++;
652 }
653
654 /* Zero the current row info for A*P */
655 for (j = 0; j < apnzj; j++) {
656 apa[apj[j]] = 0.;
657 apjdense[apj[j]] = 0;
658 }
659 }
660
661 /* Assemble the final matrix and clean up */
662 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
663 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
664 PetscCall(PetscFree3(apa, apj, apjdense));
665 PetscFunctionReturn(PETSC_SUCCESS);
666 }
667
MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)668 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
669 {
670 PetscFreeSpaceList free_space = NULL, current_space = NULL;
671 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
672 Mat P = pp->AIJ;
673 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
674 PetscInt *pti, *ptj, *ptJ;
675 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
676 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
677 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
678 MatScalar *ca;
679 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
680
681 PetscFunctionBegin;
682 /* Get ij structure of P^T */
683 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
684
685 cn = pn * ppdof;
686 /* Allocate ci array, arrays for fill computation and */
687 /* free space for accumulating nonzero column info */
688 PetscCall(PetscMalloc1(cn + 1, &ci));
689 ci[0] = 0;
690
691 /* Work arrays for rows of P^T*A */
692 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
693 PetscCall(PetscArrayzero(ptadenserow, an));
694 PetscCall(PetscArrayzero(denserow, cn));
695
696 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
697 /* This should be reasonable if sparsity of PtAP is similar to that of A. */
698 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
699 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
700 current_space = free_space;
701
702 /* Determine symbolic info for each row of C: */
703 for (i = 0; i < pn; i++) {
704 ptnzi = pti[i + 1] - pti[i];
705 ptJ = ptj + pti[i];
706 for (dof = 0; dof < ppdof; dof++) {
707 ptanzi = 0;
708 /* Determine symbolic row of PtA: */
709 for (j = 0; j < ptnzi; j++) {
710 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
711 arow = ptJ[j] * ppdof + dof;
712 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
713 anzj = ai[arow + 1] - ai[arow];
714 ajj = aj + ai[arow];
715 for (k = 0; k < anzj; k++) {
716 if (!ptadenserow[ajj[k]]) {
717 ptadenserow[ajj[k]] = -1;
718 ptasparserow[ptanzi++] = ajj[k];
719 }
720 }
721 }
722 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
723 ptaj = ptasparserow;
724 cnzi = 0;
725 for (j = 0; j < ptanzi; j++) {
726 /* Get offset within block of P */
727 pshift = *ptaj % ppdof;
728 /* Get block row of P */
729 prow = (*ptaj++) / ppdof; /* integer division */
730 /* P has same number of nonzeros per row as the compressed form */
731 pnzj = pi[prow + 1] - pi[prow];
732 pjj = pj + pi[prow];
733 for (k = 0; k < pnzj; k++) {
734 /* Locations in C are shifted by the offset within the block */
735 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
736 if (!denserow[pjj[k] * ppdof + pshift]) {
737 denserow[pjj[k] * ppdof + pshift] = -1;
738 sparserow[cnzi++] = pjj[k] * ppdof + pshift;
739 }
740 }
741 }
742
743 /* sort sparserow */
744 PetscCall(PetscSortInt(cnzi, sparserow));
745
746 /* If free space is not available, make more free space */
747 /* Double the amount of total space in the list */
748 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
749
750 /* Copy data into free space, and zero out denserows */
751 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
752
753 current_space->array += cnzi;
754 current_space->local_used += cnzi;
755 current_space->local_remaining -= cnzi;
756
757 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
758 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
759
760 /* Aside: Perhaps we should save the pta info for the numerical factorization. */
761 /* For now, we will recompute what is needed. */
762 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
763 }
764 }
765 /* nnz is now stored in ci[ptm], column indices are in the list of free space */
766 /* Allocate space for cj, initialize cj, and */
767 /* destroy list of free space and other temporary array(s) */
768 PetscCall(PetscMalloc1(ci[cn], &cj));
769 PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
770 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
771
772 /* Allocate space for ca */
773 PetscCall(PetscCalloc1(ci[cn], &ca));
774
775 /* put together the new matrix */
776 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
777 PetscCall(MatSetBlockSize(C, pp->dof));
778
779 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
780 /* Since these are PETSc arrays, change flags to free them as necessary. */
781 c = (Mat_SeqAIJ *)C->data;
782 c->free_a = PETSC_TRUE;
783 c->free_ij = PETSC_TRUE;
784 c->nonew = 0;
785
786 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
787 C->ops->productnumeric = MatProductNumeric_PtAP;
788
789 /* Clean up. */
790 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
791 PetscFunctionReturn(PETSC_SUCCESS);
792 }
793
MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)794 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
795 {
796 Mat_Product *product = C->product;
797 Mat A = product->A, P = product->B;
798
799 PetscFunctionBegin;
800 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
801 PetscFunctionReturn(PETSC_SUCCESS);
802 }
803
804 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
805
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)806 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
807 {
808 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
809
810 PetscFunctionBegin;
811 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
812 PetscFunctionReturn(PETSC_SUCCESS);
813 }
814
815 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
816
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)817 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
818 {
819 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
820
821 PetscFunctionBegin;
822 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
823 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
824 PetscFunctionReturn(PETSC_SUCCESS);
825 }
826
827 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
828
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)829 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
830 {
831 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
832
833 PetscFunctionBegin;
834 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
835 PetscFunctionReturn(PETSC_SUCCESS);
836 }
837
838 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
839
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)840 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
841 {
842 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
843
844 PetscFunctionBegin;
845 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
846 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
847 PetscFunctionReturn(PETSC_SUCCESS);
848 }
849
MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)850 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
851 {
852 Mat_Product *product = C->product;
853 Mat A = product->A, P = product->B;
854 PetscBool flg;
855
856 PetscFunctionBegin;
857 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
858 if (flg) {
859 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
860 C->ops->productnumeric = MatProductNumeric_PtAP;
861 PetscFunctionReturn(PETSC_SUCCESS);
862 }
863
864 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
865 PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
866 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
867 C->ops->productnumeric = MatProductNumeric_PtAP;
868 PetscFunctionReturn(PETSC_SUCCESS);
869 }
870
MatConvert_SeqMAIJ_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)871 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
872 {
873 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
874 Mat a = b->AIJ, B;
875 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
876 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
877 PetscInt *cols;
878 PetscScalar *vals;
879
880 PetscFunctionBegin;
881 PetscCall(MatGetSize(a, &m, &n));
882 PetscCall(PetscMalloc1(dof * m, &ilen));
883 for (i = 0; i < m; i++) {
884 nmax = PetscMax(nmax, aij->ilen[i]);
885 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
886 }
887 PetscCall(MatCreate(PETSC_COMM_SELF, &B));
888 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
889 PetscCall(MatSetType(B, newtype));
890 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
891 PetscCall(PetscFree(ilen));
892 PetscCall(PetscMalloc1(nmax, &icols));
893 ii = 0;
894 for (i = 0; i < m; i++) {
895 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
896 for (j = 0; j < dof; j++) {
897 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
898 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
899 ii++;
900 }
901 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
902 }
903 PetscCall(PetscFree(icols));
904 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
905 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
906
907 if (reuse == MAT_INPLACE_MATRIX) {
908 PetscCall(MatHeaderReplace(A, &B));
909 } else {
910 *newmat = B;
911 }
912 PetscFunctionReturn(PETSC_SUCCESS);
913 }
914
915 #include <../src/mat/impls/aij/mpi/mpiaij.h>
916
MatConvert_MPIMAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)917 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
918 {
919 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
920 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
921 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
922 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
923 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
924 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
925 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
926 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
927 PetscInt rstart, cstart, *garray, ii, k;
928 PetscScalar *vals, *ovals;
929
930 PetscFunctionBegin;
931 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
932 for (i = 0; i < A->rmap->n / dof; i++) {
933 nmax = PetscMax(nmax, AIJ->ilen[i]);
934 onmax = PetscMax(onmax, OAIJ->ilen[i]);
935 for (j = 0; j < dof; j++) {
936 dnz[dof * i + j] = AIJ->ilen[i];
937 onz[dof * i + j] = OAIJ->ilen[i];
938 }
939 }
940 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
941 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
942 PetscCall(MatSetType(B, newtype));
943 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
944 PetscCall(MatSetBlockSize(B, dof));
945 PetscCall(PetscFree2(dnz, onz));
946
947 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
948 rstart = dof * maij->A->rmap->rstart;
949 cstart = dof * maij->A->cmap->rstart;
950 garray = mpiaij->garray;
951
952 ii = rstart;
953 for (i = 0; i < A->rmap->n / dof; i++) {
954 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
955 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
956 for (j = 0; j < dof; j++) {
957 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
958 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
959 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
960 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
961 ii++;
962 }
963 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
964 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
965 }
966 PetscCall(PetscFree2(icols, oicols));
967
968 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
969 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
970
971 if (reuse == MAT_INPLACE_MATRIX) {
972 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
973 ((PetscObject)A)->refct = 1;
974
975 PetscCall(MatHeaderReplace(A, &B));
976
977 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
978 } else {
979 *newmat = B;
980 }
981 PetscFunctionReturn(PETSC_SUCCESS);
982 }
983
MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat * newmat)984 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
985 {
986 Mat A;
987
988 PetscFunctionBegin;
989 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
990 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
991 PetscCall(MatDestroy(&A));
992 PetscFunctionReturn(PETSC_SUCCESS);
993 }
994
MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * submat[])995 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
996 {
997 Mat A;
998
999 PetscFunctionBegin;
1000 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1001 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1002 PetscCall(MatDestroy(&A));
1003 PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005
1006 /*@
1007 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1008 operations for multicomponent problems. It interpolates each component the same
1009 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
1010 and `MATMPIAIJ` for distributed matrices.
1011
1012 Collective
1013
1014 Input Parameters:
1015 + A - the `MATAIJ` matrix describing the action on blocks
1016 - dof - the block size (number of components per node)
1017
1018 Output Parameter:
1019 . maij - the new `MATMAIJ` matrix
1020
1021 Level: advanced
1022
1023 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1024 @*/
MatCreateMAIJ(Mat A,PetscInt dof,Mat * maij)1025 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1026 {
1027 PetscInt n;
1028 Mat B;
1029 PetscBool flg;
1030 #if defined(PETSC_HAVE_CUDA)
1031 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1032 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1033 #endif
1034
1035 PetscFunctionBegin;
1036 dof = PetscAbs(dof);
1037 PetscCall(PetscObjectReference((PetscObject)A));
1038
1039 if (dof == 1) *maij = A;
1040 else {
1041 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1042 /* propagate vec type */
1043 PetscCall(MatSetVecType(B, A->defaultvectype));
1044 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1045 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1046 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1047 PetscCall(PetscLayoutSetUp(B->rmap));
1048 PetscCall(PetscLayoutSetUp(B->cmap));
1049
1050 B->assembled = PETSC_TRUE;
1051
1052 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1053 if (flg) {
1054 Mat_SeqMAIJ *b;
1055
1056 PetscCall(MatSetType(B, MATSEQMAIJ));
1057
1058 B->ops->setup = NULL;
1059 B->ops->destroy = MatDestroy_SeqMAIJ;
1060 B->ops->view = MatView_SeqMAIJ;
1061
1062 b = (Mat_SeqMAIJ *)B->data;
1063 b->dof = dof;
1064 b->AIJ = A;
1065
1066 if (dof == 2) {
1067 B->ops->mult = MatMult_SeqMAIJ_2;
1068 B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1069 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1070 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1071 } else if (dof == 3) {
1072 B->ops->mult = MatMult_SeqMAIJ_3;
1073 B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1074 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1075 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1076 } else if (dof == 4) {
1077 B->ops->mult = MatMult_SeqMAIJ_4;
1078 B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1079 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1080 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1081 } else if (dof == 5) {
1082 B->ops->mult = MatMult_SeqMAIJ_5;
1083 B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1084 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1085 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1086 } else if (dof == 6) {
1087 B->ops->mult = MatMult_SeqMAIJ_6;
1088 B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1089 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1090 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1091 } else if (dof == 7) {
1092 B->ops->mult = MatMult_SeqMAIJ_7;
1093 B->ops->multadd = MatMultAdd_SeqMAIJ_7;
1094 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
1095 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1096 } else if (dof == 8) {
1097 B->ops->mult = MatMult_SeqMAIJ_8;
1098 B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1099 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1100 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1101 } else if (dof == 9) {
1102 B->ops->mult = MatMult_SeqMAIJ_9;
1103 B->ops->multadd = MatMultAdd_SeqMAIJ_9;
1104 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
1105 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1106 } else if (dof == 10) {
1107 B->ops->mult = MatMult_SeqMAIJ_10;
1108 B->ops->multadd = MatMultAdd_SeqMAIJ_10;
1109 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
1110 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1111 } else if (dof == 11) {
1112 B->ops->mult = MatMult_SeqMAIJ_11;
1113 B->ops->multadd = MatMultAdd_SeqMAIJ_11;
1114 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
1115 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1116 } else if (dof == 16) {
1117 B->ops->mult = MatMult_SeqMAIJ_16;
1118 B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1119 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1120 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1121 } else if (dof == 18) {
1122 B->ops->mult = MatMult_SeqMAIJ_18;
1123 B->ops->multadd = MatMultAdd_SeqMAIJ_18;
1124 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
1125 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1126 } else {
1127 B->ops->mult = MatMult_SeqMAIJ_N;
1128 B->ops->multadd = MatMultAdd_SeqMAIJ_N;
1129 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
1130 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1131 }
1132 #if defined(PETSC_HAVE_CUDA)
1133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1134 #endif
1135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1137 } else {
1138 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1139 Mat_MPIMAIJ *b;
1140 IS from, to;
1141 Vec gvec;
1142
1143 PetscCall(MatSetType(B, MATMPIMAIJ));
1144
1145 B->ops->setup = NULL;
1146 B->ops->destroy = MatDestroy_MPIMAIJ;
1147 B->ops->view = MatView_MPIMAIJ;
1148
1149 b = (Mat_MPIMAIJ *)B->data;
1150 b->dof = dof;
1151 b->A = A;
1152
1153 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1154 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1155
1156 PetscCall(VecGetSize(mpiaij->lvec, &n));
1157 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1158 PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1159 PetscCall(VecSetBlockSize(b->w, dof));
1160 PetscCall(VecSetType(b->w, VECSEQ));
1161
1162 /* create two temporary Index sets for build scatter gather */
1163 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1164 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1165
1166 /* create temporary global vector to generate scatter context */
1167 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1168
1169 /* generate the scatter context */
1170 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1171
1172 PetscCall(ISDestroy(&from));
1173 PetscCall(ISDestroy(&to));
1174 PetscCall(VecDestroy(&gvec));
1175
1176 B->ops->mult = MatMult_MPIMAIJ_dof;
1177 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1178 B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1179 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1180
1181 #if defined(PETSC_HAVE_CUDA)
1182 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1183 #endif
1184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1186 }
1187 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
1188 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1189 PetscCall(MatSetUp(B));
1190 #if defined(PETSC_HAVE_CUDA)
1191 /* temporary until we have CUDA implementation of MAIJ */
1192 {
1193 PetscBool flg;
1194 if (convert) {
1195 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1196 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1197 }
1198 }
1199 #endif
1200 *maij = B;
1201 }
1202 PetscFunctionReturn(PETSC_SUCCESS);
1203 }
1204