xref: /petsc/src/mat/impls/kaij/kaij.c (revision 5d2a865bca654049dd32bfe43807bdf4c92c46f4) !
1 /*
2   Defines the basic matrix operations for the KAIJ  matrix storage format.
3   This format is used to evaluate matrices of the form:
4 
5     [I \otimes S + A \otimes T]
6 
7   where
8     S is a dense (p \times q) matrix
9     T is a dense (p \times q) matrix
10     A is an AIJ  (n \times n) matrix
11     I is the identity matrix
12 
13   The resulting matrix is (np \times nq)
14 
15   We provide:
16      MatMult()
17      MatMultAdd()
18      MatInvertBlockDiagonal()
19   and
20      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
21 
22   This single directory handles both the sequential and parallel codes
23 */
24 
25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
26 #include <../src/mat/utils/freespace.h>
27 #include <petsc/private/vecimpl.h>
28 
29 /*@
30   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
31 
32   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
33 
34   Input Parameter:
35 . A - the `MATKAIJ` matrix
36 
37   Output Parameter:
38 . B - the `MATAIJ` matrix
39 
40   Level: advanced
41 
42   Note:
43   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
44 
45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46 @*/
MatKAIJGetAIJ(Mat A,Mat * B)47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48 {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 /*@C
67   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
68 
69   Not Collective; the entire `S` is stored and returned independently on all processes.
70 
71   Input Parameter:
72 . A - the `MATKAIJ` matrix
73 
74   Output Parameters:
75 + m - the number of rows in `S`
76 . n - the number of columns in `S`
77 - S - the S matrix, in form of a scalar array in column-major format
78 
79   Level: advanced
80 
81   Note:
82   All output parameters are optional (pass `NULL` if not desired)
83 
84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
MatKAIJGetS(Mat A,PetscInt * m,PetscInt * n,PetscScalar * S[])86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[])
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89 
90   PetscFunctionBegin;
91   if (m) *m = b->p;
92   if (n) *n = b->q;
93   if (S) *S = b->S;
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 /*@C
98   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
99 
100   Not Collective; the entire `S` is stored and returned independently on all processes.
101 
102   Input Parameter:
103 . A - the `MATKAIJ` matrix
104 
105   Output Parameters:
106 + m - the number of rows in `S`
107 . n - the number of columns in `S`
108 - S - the S matrix, in form of a scalar array in column-major format
109 
110   Level: advanced
111 
112   Note:
113   All output parameters are optional (pass `NULL` if not desired)
114 
115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116 @*/
MatKAIJGetSRead(Mat A,PetscInt * m,PetscInt * n,const PetscScalar * S[])117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[])
118 {
119   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120 
121   PetscFunctionBegin;
122   if (m) *m = b->p;
123   if (n) *n = b->q;
124   if (S) *S = b->S;
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 /*@C
129   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
130 
131   Not Collective
132 
133   Input Parameters:
134 + A - the `MATKAIJ` matrix
135 - S - location of pointer to array obtained with `MatKAIJGetS()`
136 
137   Level: advanced
138 
139   Note:
140   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
141   If `NULL` is passed, it will not attempt to zero the array pointer.
142 
143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
144 @*/
MatKAIJRestoreS(Mat A,PetscScalar * S[])145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[])
146 {
147   PetscFunctionBegin;
148   if (S) *S = NULL;
149   PetscCall(PetscObjectStateIncrease((PetscObject)A));
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 /*@C
154   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
155 
156   Not Collective
157 
158   Input Parameters:
159 + A - the `MATKAIJ` matrix
160 - S - location of pointer to array obtained with `MatKAIJGetS()`
161 
162   Level: advanced
163 
164   Note:
165   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
166   If `NULL` is passed, it will not attempt to zero the array pointer.
167 
168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
169 @*/
MatKAIJRestoreSRead(Mat A,const PetscScalar * S[])170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[])
171 {
172   PetscFunctionBegin;
173   if (S) *S = NULL;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /*@C
178   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
179 
180   Not Collective; the entire `T` is stored and returned independently on all processes
181 
182   Input Parameter:
183 . A - the `MATKAIJ` matrix
184 
185   Output Parameters:
186 + m - the number of rows in `T`
187 . n - the number of columns in `T`
188 - T - the T matrix, in form of a scalar array in column-major format
189 
190   Level: advanced
191 
192   Note:
193   All output parameters are optional (pass `NULL` if not desired)
194 
195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
196 @*/
MatKAIJGetT(Mat A,PetscInt * m,PetscInt * n,PetscScalar * T[])197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[])
198 {
199   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
200 
201   PetscFunctionBegin;
202   if (m) *m = b->p;
203   if (n) *n = b->q;
204   if (T) *T = b->T;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@C
209   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
210 
211   Not Collective; the entire `T` is stored and returned independently on all processes
212 
213   Input Parameter:
214 . A - the `MATKAIJ` matrix
215 
216   Output Parameters:
217 + m - the number of rows in `T`
218 . n - the number of columns in `T`
219 - T - the T matrix, in form of a scalar array in column-major format
220 
221   Level: advanced
222 
223   Note:
224   All output parameters are optional (pass `NULL` if not desired)
225 
226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
227 @*/
MatKAIJGetTRead(Mat A,PetscInt * m,PetscInt * n,const PetscScalar * T[])228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[])
229 {
230   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
231 
232   PetscFunctionBegin;
233   if (m) *m = b->p;
234   if (n) *n = b->q;
235   if (T) *T = b->T;
236   PetscFunctionReturn(PETSC_SUCCESS);
237 }
238 
239 /*@C
240   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
241 
242   Not Collective
243 
244   Input Parameters:
245 + A - the `MATKAIJ` matrix
246 - T - location of pointer to array obtained with `MatKAIJGetS()`
247 
248   Level: advanced
249 
250   Note:
251   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252   If `NULL` is passed, it will not attempt to zero the array pointer.
253 
254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
255 @*/
MatKAIJRestoreT(Mat A,PetscScalar * T[])256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[])
257 {
258   PetscFunctionBegin;
259   if (T) *T = NULL;
260   PetscCall(PetscObjectStateIncrease((PetscObject)A));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*@C
265   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
266 
267   Not Collective
268 
269   Input Parameters:
270 + A - the `MATKAIJ` matrix
271 - T - location of pointer to array obtained with `MatKAIJGetS()`
272 
273   Level: advanced
274 
275   Note:
276   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
277   If `NULL` is passed, it will not attempt to zero the array pointer.
278 
279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
280 @*/
MatKAIJRestoreTRead(Mat A,const PetscScalar * T[])281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[])
282 {
283   PetscFunctionBegin;
284   if (T) *T = NULL;
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 /*@
289   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
290 
291   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
292 
293   Input Parameters:
294 + A - the `MATKAIJ` matrix
295 - B - the `MATAIJ` matrix
296 
297   Level: advanced
298 
299   Notes:
300   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
301 
302   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
303 
304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
305 @*/
MatKAIJSetAIJ(Mat A,Mat B)306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
307 {
308   PetscMPIInt size;
309   PetscBool   flg;
310 
311   PetscFunctionBegin;
312   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
313   if (size == 1) {
314     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
315     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
316     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
317     a->AIJ         = B;
318   } else {
319     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
320     a->A           = B;
321   }
322   PetscCall(PetscObjectReference((PetscObject)B));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 /*@
327   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
328 
329   Logically Collective; the entire `S` is stored independently on all processes.
330 
331   Input Parameters:
332 + A - the `MATKAIJ` matrix
333 . p - the number of rows in `S`
334 . q - the number of columns in `S`
335 - S - the S matrix, in form of a scalar array in column-major format
336 
337   Level: advanced
338 
339   Notes:
340   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
341 
342   The `S` matrix is copied, so the user can destroy this array.
343 
344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
345 @*/
MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
347 {
348   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
349 
350   PetscFunctionBegin;
351   PetscCall(PetscFree(a->S));
352   if (S) {
353     PetscCall(PetscMalloc1(p * q, &a->S));
354     PetscCall(PetscArraycpy(a->S, S, p * q));
355   } else a->S = NULL;
356 
357   a->p = p;
358   a->q = q;
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@
363   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
364 
365   Logically Collective.
366 
367   Input Parameter:
368 . A - the `MATKAIJ` matrix
369 
370   Output Parameter:
371 . identity - the Boolean value
372 
373   Level: advanced
374 
375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
376 @*/
MatKAIJGetScaledIdentity(Mat A,PetscBool * identity)377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
378 {
379   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
380   PetscInt     i, j;
381 
382   PetscFunctionBegin;
383   if (a->p != a->q) {
384     *identity = PETSC_FALSE;
385     PetscFunctionReturn(PETSC_SUCCESS);
386   } else *identity = PETSC_TRUE;
387   if (!a->isTI || a->S) {
388     for (i = 0; i < a->p && *identity; i++) {
389       for (j = 0; j < a->p && *identity; j++) {
390         if (i != j) {
391           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
392           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
393         } else {
394           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
395           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
396         }
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*@
404   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
405 
406   Logically Collective; the entire `T` is stored independently on all processes.
407 
408   Input Parameters:
409 + A - the `MATKAIJ` matrix
410 . p - the number of rows in `S`
411 . q - the number of columns in `S`
412 - T - the `T` matrix, in form of a scalar array in column-major format
413 
414   Level: advanced
415 
416   Notes:
417   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
418 
419   The `T` matrix is copied, so the user can destroy this array.
420 
421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
422 @*/
MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
424 {
425   PetscInt     i, j;
426   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
427   PetscBool    isTI = PETSC_FALSE;
428 
429   PetscFunctionBegin;
430   /* check if T is an identity matrix */
431   if (T && (p == q)) {
432     isTI = PETSC_TRUE;
433     for (i = 0; i < p; i++) {
434       for (j = 0; j < q; j++) {
435         if (i == j) {
436           /* diagonal term must be 1 */
437           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
438         } else {
439           /* off-diagonal term must be 0 */
440           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
441         }
442       }
443     }
444   }
445   a->isTI = isTI;
446 
447   PetscCall(PetscFree(a->T));
448   if (T && (!isTI)) {
449     PetscCall(PetscMalloc1(p * q, &a->T));
450     PetscCall(PetscArraycpy(a->T, T, p * q));
451   } else a->T = NULL;
452 
453   a->p = p;
454   a->q = q;
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
MatDestroy_SeqKAIJ(Mat A)458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
459 {
460   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
461 
462   PetscFunctionBegin;
463   PetscCall(MatDestroy(&b->AIJ));
464   PetscCall(PetscFree(b->S));
465   PetscCall(PetscFree(b->T));
466   PetscCall(PetscFree(b->ibdiag));
467   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
468   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
469   PetscCall(PetscFree(A->data));
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
MatKAIJ_build_AIJ_OAIJ(Mat A)473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
474 {
475   Mat_MPIKAIJ     *a;
476   Mat_MPIAIJ      *mpiaij;
477   PetscScalar     *T;
478   PetscInt         i, j;
479   PetscObjectState state;
480 
481   PetscFunctionBegin;
482   a      = (Mat_MPIKAIJ *)A->data;
483   mpiaij = (Mat_MPIAIJ *)a->A->data;
484 
485   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
486   if (state == a->state) {
487     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
488     PetscFunctionReturn(PETSC_SUCCESS);
489   } else {
490     PetscCall(MatDestroy(&a->AIJ));
491     PetscCall(MatDestroy(&a->OAIJ));
492     if (a->isTI) {
493       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
494        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
495        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
496        * to pass in. */
497       PetscCall(PetscMalloc1(a->p * a->q, &T));
498       for (i = 0; i < a->p; i++) {
499         for (j = 0; j < a->q; j++) {
500           if (i == j) T[i + j * a->p] = 1.0;
501           else T[i + j * a->p] = 0.0;
502         }
503       }
504     } else T = a->T;
505     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
506     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
507     if (a->isTI) PetscCall(PetscFree(T));
508     a->state = state;
509   }
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
MatSetUp_KAIJ(Mat A)513 static PetscErrorCode MatSetUp_KAIJ(Mat A)
514 {
515   PetscInt     n;
516   PetscMPIInt  size;
517   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
518 
519   PetscFunctionBegin;
520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
521   if (size == 1) {
522     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
523     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
524     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
525     PetscCall(PetscLayoutSetUp(A->rmap));
526     PetscCall(PetscLayoutSetUp(A->cmap));
527   } else {
528     Mat_MPIKAIJ *a;
529     Mat_MPIAIJ  *mpiaij;
530     IS           from, to;
531     Vec          gvec;
532 
533     a      = (Mat_MPIKAIJ *)A->data;
534     mpiaij = (Mat_MPIAIJ *)a->A->data;
535     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
536     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
537     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
538     PetscCall(PetscLayoutSetUp(A->rmap));
539     PetscCall(PetscLayoutSetUp(A->cmap));
540 
541     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
542 
543     PetscCall(VecGetSize(mpiaij->lvec, &n));
544     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
545     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
546     PetscCall(VecSetBlockSize(a->w, a->q));
547     PetscCall(VecSetType(a->w, VECSEQ));
548 
549     /* create two temporary Index sets for build scatter gather */
550     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
551     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
552 
553     /* create temporary global vector to generate scatter context */
554     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
555 
556     /* generate the scatter context */
557     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
558 
559     PetscCall(ISDestroy(&from));
560     PetscCall(ISDestroy(&to));
561     PetscCall(VecDestroy(&gvec));
562   }
563 
564   A->assembled = PETSC_TRUE;
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
MatView_KAIJ(Mat A,PetscViewer viewer)568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
569 {
570   PetscViewerFormat format;
571   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
572   Mat               B;
573   PetscInt          i;
574   PetscBool         ismpikaij;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
578   PetscCall(PetscViewerGetFormat(viewer, &format));
579   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
580     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
581 
582     /* Print appropriate details for S. */
583     if (!a->S) {
584       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
585     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
586       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
587       for (i = 0; i < (a->p * a->q); i++) {
588 #if defined(PETSC_USE_COMPLEX)
589         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
590 #else
591         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
592 #endif
593       }
594       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
595     }
596 
597     /* Print appropriate details for T. */
598     if (a->isTI) {
599       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
600     } else if (!a->T) {
601       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
602     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
604       for (i = 0; i < (a->p * a->q); i++) {
605 #if defined(PETSC_USE_COMPLEX)
606         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
607 #else
608         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
609 #endif
610       }
611       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
612     }
613 
614     /* Now print details for the AIJ matrix, using the AIJ viewer. */
615     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
616     if (ismpikaij) {
617       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
618       PetscCall(MatView(b->A, viewer));
619     } else {
620       PetscCall(MatView(a->AIJ, viewer));
621     }
622 
623   } else {
624     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
625     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
626     PetscCall(MatView(B, viewer));
627     PetscCall(MatDestroy(&B));
628   }
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
MatDestroy_MPIKAIJ(Mat A)632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
633 {
634   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
635 
636   PetscFunctionBegin;
637   PetscCall(MatDestroy(&b->AIJ));
638   PetscCall(MatDestroy(&b->OAIJ));
639   PetscCall(MatDestroy(&b->A));
640   PetscCall(VecScatterDestroy(&b->ctx));
641   PetscCall(VecDestroy(&b->w));
642   PetscCall(PetscFree(b->S));
643   PetscCall(PetscFree(b->T));
644   PetscCall(PetscFree(b->ibdiag));
645   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
647   PetscCall(PetscFree(A->data));
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 /* zz = yy + Axx */
MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
655   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656   const PetscScalar *s = b->S, *t = b->T;
657   const PetscScalar *x, *v, *bx;
658   PetscScalar       *y, *sums;
659   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
660   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
661 
662   PetscFunctionBegin;
663   if (!yy) {
664     PetscCall(VecSet(zz, 0.0));
665   } else {
666     PetscCall(VecCopy(yy, zz));
667   }
668   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
669 
670   PetscCall(VecGetArrayRead(xx, &x));
671   PetscCall(VecGetArray(zz, &y));
672   idx = a->j;
673   v   = a->a;
674   ii  = a->i;
675 
676   if (b->isTI) {
677     for (i = 0; i < m; i++) {
678       jrow = ii[i];
679       n    = ii[i + 1] - jrow;
680       sums = y + p * i;
681       for (j = 0; j < n; j++) {
682         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
683       }
684     }
685     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
686   } else if (t) {
687     for (i = 0; i < m; i++) {
688       jrow = ii[i];
689       n    = ii[i + 1] - jrow;
690       sums = y + p * i;
691       for (j = 0; j < n; j++) {
692         for (k = 0; k < p; k++) {
693           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
694         }
695       }
696     }
697     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
698      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
699      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
700      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
701     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
702   }
703   if (s) {
704     for (i = 0; i < m; i++) {
705       sums = y + p * i;
706       bx   = x + q * i;
707       if (i < b->AIJ->cmap->n) {
708         for (j = 0; j < q; j++) {
709           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
710         }
711       }
712     }
713     PetscCall(PetscLogFlops(2.0 * m * p * q));
714   }
715 
716   PetscCall(VecRestoreArrayRead(xx, &x));
717   PetscCall(VecRestoreArray(zz, &y));
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
722 {
723   PetscFunctionBegin;
724   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 #include <petsc/private/kernels/blockinvert.h>
729 
MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar ** values)730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
731 {
732   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
733   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
734   const PetscScalar *S = b->S;
735   const PetscScalar *T = b->T;
736   const PetscScalar *v = a->a;
737   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
738   PetscInt           i, j, *v_pivots, dof, dof2;
739   PetscScalar       *diag, aval, *v_work;
740 
741   PetscFunctionBegin;
742   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
743   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
744 
745   dof  = p;
746   dof2 = dof * dof;
747 
748   if (b->ibdiagvalid) {
749     if (values) *values = b->ibdiag;
750     PetscFunctionReturn(PETSC_SUCCESS);
751   }
752   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
753   if (values) *values = b->ibdiag;
754   diag = b->ibdiag;
755 
756   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
757   for (i = 0; i < m; i++) {
758     if (S) {
759       PetscCall(PetscArraycpy(diag, S, dof2));
760     } else {
761       PetscCall(PetscArrayzero(diag, dof2));
762     }
763     if (b->isTI) {
764       aval = 0;
765       for (j = ii[i]; j < ii[i + 1]; j++)
766         if (idx[j] == i) aval = v[j];
767       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j = ii[i]; j < ii[i + 1]; j++)
771         if (idx[j] == i) aval = v[j];
772       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
773     }
774     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
775     diag += dof2;
776   }
777   PetscCall(PetscFree2(v_work, v_pivots));
778 
779   b->ibdiagvalid = PETSC_TRUE;
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat * B)783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
784 {
785   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
786 
787   PetscFunctionBegin;
788   *B = kaij->AIJ;
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
793 {
794   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ *)A->data;
795   Mat             AIJ, OAIJ, B;
796   PetscInt       *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
797   const PetscInt  p = a->p, q = a->q;
798   PetscBool       ismpikaij, diagDense;
799   const PetscInt *aijdiag;
800 
801   PetscFunctionBegin;
802   if (reuse != MAT_REUSE_MATRIX) {
803     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
804     if (ismpikaij) {
805       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
806       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
807       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
808     } else {
809       AIJ  = a->AIJ;
810       OAIJ = NULL;
811     }
812     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
813     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
814     PetscCall(MatSetType(B, MATAIJ));
815     PetscCall(MatGetSize(AIJ, &m, NULL));
816 
817     /*
818       Determine the first row of AIJ with missing diagonal and assume all successive rows also have a missing diagonal
819     */
820     PetscCall(MatGetDiagonalMarkers_SeqAIJ(AIJ, &aijdiag, &diagDense));
821     if (diagDense || !a->S) d = m;
822     else {
823       Mat_SeqAIJ *aij = (Mat_SeqAIJ *)AIJ->data;
824 
825       for (d = 0; d < m; d++) {
826         if (aijdiag[d] == aij->i[d + 1]) break;
827       }
828     }
829     PetscCall(PetscMalloc1(m * p, &d_nnz));
830     for (i = 0; i < m; ++i) {
831       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
832       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
833       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
834     }
835     if (OAIJ) {
836       PetscCall(PetscMalloc1(m * p, &o_nnz));
837       for (i = 0; i < m; ++i) {
838         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
839         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
840         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
841       }
842       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
843     } else {
844       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
845     }
846     PetscCall(PetscFree(d_nnz));
847     PetscCall(PetscFree(o_nnz));
848   } else B = *newmat;
849   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
850   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B));
851   else *newmat = B;
852   PetscFunctionReturn(PETSC_SUCCESS);
853 }
854 
MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)855 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
856 {
857   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
858   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
859   const PetscScalar *aa = a->a, *T = kaij->T, *v;
860   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
861   const PetscScalar *b, *xb, *idiag;
862   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
863   PetscInt           i, j, k, i2, bs, bs2, nz;
864 
865   PetscFunctionBegin;
866   its = its * lits;
867   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
868   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
869   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
870   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
871   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
872   bs  = p;
873   bs2 = bs * bs;
874 
875   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
876 
877   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
878   idiag = kaij->ibdiag;
879   PetscCall(MatGetDiagonalMarkers_SeqAIJ(kaij->AIJ, &diag, NULL));
880 
881   if (!kaij->sor.setup) {
882     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
883     kaij->sor.setup = PETSC_TRUE;
884   }
885   y    = kaij->sor.y;
886   w    = kaij->sor.w;
887   work = kaij->sor.work;
888   t    = kaij->sor.t;
889   arr  = kaij->sor.arr;
890 
891   PetscCall(VecGetArray(xx, &x));
892   PetscCall(VecGetArrayRead(bb, &b));
893 
894   if (flag & SOR_ZERO_INITIAL_GUESS) {
895     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
896       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
897       PetscCall(PetscArraycpy(t, b, bs));
898       i2 = bs;
899       idiag += bs2;
900       for (i = 1; i < m; i++) {
901         v  = aa + ai[i];
902         vi = aj + ai[i];
903         nz = diag[i] - ai[i];
904 
905         if (T) { /* b - T (Arow * x) */
906           PetscCall(PetscArrayzero(w, bs));
907           for (j = 0; j < nz; j++) {
908             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
909           }
910           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
911           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
912         } else if (kaij->isTI) {
913           PetscCall(PetscArraycpy(t + i2, b + i2, bs));
914           for (j = 0; j < nz; j++) {
915             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
916           }
917         } else {
918           PetscCall(PetscArraycpy(t + i2, b + i2, bs));
919         }
920 
921         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
922         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
923 
924         idiag += bs2;
925         i2 += bs;
926       }
927       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
928       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
929       xb = t;
930     } else xb = b;
931     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
932       idiag = kaij->ibdiag + bs2 * (m - 1);
933       i2    = bs * (m - 1);
934       PetscCall(PetscArraycpy(w, xb + i2, bs));
935       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
936       i2 -= bs;
937       idiag -= bs2;
938       for (i = m - 2; i >= 0; i--) {
939         v  = aa + diag[i] + 1;
940         vi = aj + diag[i] + 1;
941         nz = ai[i + 1] - diag[i] - 1;
942 
943         if (T) { /* FIXME: This branch untested */
944           PetscCall(PetscArraycpy(w, xb + i2, bs));
945           /* copy all rows of x that are needed into contiguous space */
946           workt = work;
947           for (j = 0; j < nz; j++) {
948             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
949             workt += bs;
950           }
951           arrt = arr;
952           for (j = 0; j < nz; j++) {
953             PetscCall(PetscArraycpy(arrt, T, bs2));
954             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
955             arrt += bs2;
956           }
957           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
958         } else if (kaij->isTI) {
959           PetscCall(PetscArraycpy(w, t + i2, bs));
960           for (j = 0; j < nz; j++) {
961             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
962           }
963         }
964 
965         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
966         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
967 
968         idiag -= bs2;
969         i2 -= bs;
970       }
971       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
972     }
973     its--;
974   }
975   while (its--) { /* FIXME: This branch not updated */
976     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
977       i2    = 0;
978       idiag = kaij->ibdiag;
979       for (i = 0; i < m; i++) {
980         PetscCall(PetscArraycpy(w, b + i2, bs));
981 
982         v     = aa + ai[i];
983         vi    = aj + ai[i];
984         nz    = diag[i] - ai[i];
985         workt = work;
986         for (j = 0; j < nz; j++) {
987           PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
988           workt += bs;
989         }
990         arrt = arr;
991         if (T) {
992           for (j = 0; j < nz; j++) {
993             PetscCall(PetscArraycpy(arrt, T, bs2));
994             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
995             arrt += bs2;
996           }
997           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
998         } else if (kaij->isTI) {
999           for (j = 0; j < nz; j++) {
1000             PetscCall(PetscArrayzero(arrt, bs2));
1001             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1002             arrt += bs2;
1003           }
1004           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1005         }
1006         PetscCall(PetscArraycpy(t + i2, w, bs));
1007 
1008         v     = aa + diag[i] + 1;
1009         vi    = aj + diag[i] + 1;
1010         nz    = ai[i + 1] - diag[i] - 1;
1011         workt = work;
1012         for (j = 0; j < nz; j++) {
1013           PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1014           workt += bs;
1015         }
1016         arrt = arr;
1017         if (T) {
1018           for (j = 0; j < nz; j++) {
1019             PetscCall(PetscArraycpy(arrt, T, bs2));
1020             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1021             arrt += bs2;
1022           }
1023           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1024         } else if (kaij->isTI) {
1025           for (j = 0; j < nz; j++) {
1026             PetscCall(PetscArrayzero(arrt, bs2));
1027             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1028             arrt += bs2;
1029           }
1030           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1031         }
1032 
1033         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1034         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1035 
1036         idiag += bs2;
1037         i2 += bs;
1038       }
1039       xb = t;
1040     } else xb = b;
1041     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1042       idiag = kaij->ibdiag + bs2 * (m - 1);
1043       i2    = bs * (m - 1);
1044       if (xb == b) {
1045         for (i = m - 1; i >= 0; i--) {
1046           PetscCall(PetscArraycpy(w, b + i2, bs));
1047 
1048           v     = aa + ai[i];
1049           vi    = aj + ai[i];
1050           nz    = diag[i] - ai[i];
1051           workt = work;
1052           for (j = 0; j < nz; j++) {
1053             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1054             workt += bs;
1055           }
1056           arrt = arr;
1057           if (T) {
1058             for (j = 0; j < nz; j++) {
1059               PetscCall(PetscArraycpy(arrt, T, bs2));
1060               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1061               arrt += bs2;
1062             }
1063             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1064           } else if (kaij->isTI) {
1065             for (j = 0; j < nz; j++) {
1066               PetscCall(PetscArrayzero(arrt, bs2));
1067               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1068               arrt += bs2;
1069             }
1070             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1071           }
1072 
1073           v     = aa + diag[i] + 1;
1074           vi    = aj + diag[i] + 1;
1075           nz    = ai[i + 1] - diag[i] - 1;
1076           workt = work;
1077           for (j = 0; j < nz; j++) {
1078             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1079             workt += bs;
1080           }
1081           arrt = arr;
1082           if (T) {
1083             for (j = 0; j < nz; j++) {
1084               PetscCall(PetscArraycpy(arrt, T, bs2));
1085               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1086               arrt += bs2;
1087             }
1088             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1089           } else if (kaij->isTI) {
1090             for (j = 0; j < nz; j++) {
1091               PetscCall(PetscArrayzero(arrt, bs2));
1092               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1093               arrt += bs2;
1094             }
1095             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1096           }
1097 
1098           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1099           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1100         }
1101       } else {
1102         for (i = m - 1; i >= 0; i--) {
1103           PetscCall(PetscArraycpy(w, xb + i2, bs));
1104           v     = aa + diag[i] + 1;
1105           vi    = aj + diag[i] + 1;
1106           nz    = ai[i + 1] - diag[i] - 1;
1107           workt = work;
1108           for (j = 0; j < nz; j++) {
1109             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1110             workt += bs;
1111           }
1112           arrt = arr;
1113           if (T) {
1114             for (j = 0; j < nz; j++) {
1115               PetscCall(PetscArraycpy(arrt, T, bs2));
1116               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1117               arrt += bs2;
1118             }
1119             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1120           } else if (kaij->isTI) {
1121             for (j = 0; j < nz; j++) {
1122               PetscCall(PetscArrayzero(arrt, bs2));
1123               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1124               arrt += bs2;
1125             }
1126             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1127           }
1128           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1129           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1130         }
1131       }
1132       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1133     }
1134   }
1135 
1136   PetscCall(VecRestoreArray(xx, &x));
1137   PetscCall(VecRestoreArrayRead(bb, &b));
1138   PetscFunctionReturn(PETSC_SUCCESS);
1139 }
1140 
1141 /*===================================================================================*/
1142 
MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)1143 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1144 {
1145   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1146 
1147   PetscFunctionBegin;
1148   if (!yy) {
1149     PetscCall(VecSet(zz, 0.0));
1150   } else {
1151     PetscCall(VecCopy(yy, zz));
1152   }
1153   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1154   /* start the scatter */
1155   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1156   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1157   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1158   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)1162 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1163 {
1164   PetscFunctionBegin;
1165   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar ** values)1169 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1170 {
1171   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1175   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** values)1179 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1180 {
1181   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1182   PetscBool    diag = PETSC_FALSE;
1183   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1184   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1185 
1186   PetscFunctionBegin;
1187   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1188   b->getrowactive = PETSC_TRUE;
1189   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1190 
1191   if ((!S) && (!T) && (!b->isTI)) {
1192     if (ncols) *ncols = 0;
1193     if (cols) *cols = NULL;
1194     if (values) *values = NULL;
1195     PetscFunctionReturn(PETSC_SUCCESS);
1196   }
1197 
1198   if (T || b->isTI) {
1199     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1200     c = nzaij;
1201     for (i = 0; i < nzaij; i++) {
1202       /* check if this row contains a diagonal entry */
1203       if (colsaij[i] == r) {
1204         diag = PETSC_TRUE;
1205         c    = i;
1206       }
1207     }
1208   } else nzaij = c = 0;
1209 
1210   /* calculate size of row */
1211   nz = 0;
1212   if (S) nz += q;
1213   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1214 
1215   if (cols || values) {
1216     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1217     for (i = 0; i < q; i++) {
1218       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1219       v[i] = 0.0;
1220     }
1221     if (b->isTI) {
1222       for (i = 0; i < nzaij; i++) {
1223         for (j = 0; j < q; j++) {
1224           idx[i * q + j] = colsaij[i] * q + j;
1225           v[i * q + j]   = (j == s ? vaij[i] : 0);
1226         }
1227       }
1228     } else if (T) {
1229       for (i = 0; i < nzaij; i++) {
1230         for (j = 0; j < q; j++) {
1231           idx[i * q + j] = colsaij[i] * q + j;
1232           v[i * q + j]   = vaij[i] * T[s + j * p];
1233         }
1234       }
1235     }
1236     if (S) {
1237       for (j = 0; j < q; j++) {
1238         idx[c * q + j] = r * q + j;
1239         v[c * q + j] += S[s + j * p];
1240       }
1241     }
1242   }
1243 
1244   if (ncols) *ncols = nz;
1245   if (cols) *cols = idx;
1246   if (values) *values = v;
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1250 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1251 {
1252   PetscFunctionBegin;
1253   PetscCall(PetscFree2(*idx, *v));
1254   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1255   PetscFunctionReturn(PETSC_SUCCESS);
1256 }
1257 
MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt * ncols,PetscInt ** cols,PetscScalar ** values)1258 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1259 {
1260   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1261   Mat            AIJ  = b->A;
1262   PetscBool      diag = PETSC_FALSE;
1263   Mat            MatAIJ, MatOAIJ;
1264   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1265   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1266   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1267 
1268   PetscFunctionBegin;
1269   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1270   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1271   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1272   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1273   b->getrowactive = PETSC_TRUE;
1274   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1275   lrow = row - rstart;
1276 
1277   if ((!S) && (!T) && (!b->isTI)) {
1278     if (ncols) *ncols = 0;
1279     if (cols) *cols = NULL;
1280     if (values) *values = NULL;
1281     PetscFunctionReturn(PETSC_SUCCESS);
1282   }
1283 
1284   r = lrow / p;
1285   s = lrow % p;
1286 
1287   if (T || b->isTI) {
1288     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1289     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1290     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1291 
1292     c = ncolsaij + ncolsoaij;
1293     for (i = 0; i < ncolsaij; i++) {
1294       /* check if this row contains a diagonal entry */
1295       if (colsaij[i] == r) {
1296         diag = PETSC_TRUE;
1297         c    = i;
1298       }
1299     }
1300   } else c = 0;
1301 
1302   /* calculate size of row */
1303   nz = 0;
1304   if (S) nz += q;
1305   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1306 
1307   if (cols || values) {
1308     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1309     for (i = 0; i < q; i++) {
1310       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1311       v[i] = 0.0;
1312     }
1313     if (b->isTI) {
1314       for (i = 0; i < ncolsaij; i++) {
1315         for (j = 0; j < q; j++) {
1316           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1317           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1318         }
1319       }
1320       for (i = 0; i < ncolsoaij; i++) {
1321         for (j = 0; j < q; j++) {
1322           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1323           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1324         }
1325       }
1326     } else if (T) {
1327       for (i = 0; i < ncolsaij; i++) {
1328         for (j = 0; j < q; j++) {
1329           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1330           v[i * q + j]   = vals[i] * T[s + j * p];
1331         }
1332       }
1333       for (i = 0; i < ncolsoaij; i++) {
1334         for (j = 0; j < q; j++) {
1335           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1336           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1337         }
1338       }
1339     }
1340     if (S) {
1341       for (j = 0; j < q; j++) {
1342         idx[c * q + j] = (r + rstart / p) * q + j;
1343         v[c * q + j] += S[s + j * p];
1344       }
1345     }
1346   }
1347 
1348   if (ncols) *ncols = nz;
1349   if (cols) *cols = idx;
1350   if (values) *values = v;
1351   PetscFunctionReturn(PETSC_SUCCESS);
1352 }
1353 
MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1354 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1355 {
1356   PetscFunctionBegin;
1357   PetscCall(PetscFree2(*idx, *v));
1358   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat * newmat)1362 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1363 {
1364   Mat A;
1365 
1366   PetscFunctionBegin;
1367   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1368   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1369   PetscCall(MatDestroy(&A));
1370   PetscFunctionReturn(PETSC_SUCCESS);
1371 }
1372 
1373 /*@C
1374   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1375 
1376   Collective
1377 
1378   Input Parameters:
1379 + A - the `MATAIJ` matrix
1380 . p - number of rows in `S` and `T`
1381 . q - number of columns in `S` and `T`
1382 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1383 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1384 
1385   Output Parameter:
1386 . kaij - the new `MATKAIJ` matrix
1387 
1388   Level: advanced
1389 
1390   Notes:
1391   The created matrix is of the following form\:
1392 .vb
1393     [I \otimes S + A \otimes T]
1394 .ve
1395   where
1396 .vb
1397   S is a dense (p \times q) matrix
1398   T is a dense (p \times q) matrix
1399   A is a `MATAIJ`  (n \times n) matrix
1400   I is the identity matrix
1401 .ve
1402   The resulting matrix is (np \times nq)
1403 
1404   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1405   column-major format.
1406 
1407   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1408 
1409   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1410 
1411   Developer Notes:
1412   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1413   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1414   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1415   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1416 
1417 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1418 @*/
MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat * kaij)1419 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1420 {
1421   PetscFunctionBegin;
1422   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1423   PetscCall(MatSetType(*kaij, MATKAIJ));
1424   PetscCall(MatKAIJSetAIJ(*kaij, A));
1425   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1426   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1427   PetscCall(MatSetUp(*kaij));
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
1431 /*MC
1432   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1433     [I \otimes S + A \otimes T],
1434   where
1435 .vb
1436     S is a dense (p \times q) matrix,
1437     T is a dense (p \times q) matrix,
1438     A is an AIJ  (n \times n) matrix,
1439     and I is the identity matrix.
1440 .ve
1441   The resulting matrix is (np \times nq).
1442 
1443   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1444 
1445   Level: advanced
1446 
1447   Note:
1448   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1449   where x and b are column vectors containing the row-major representations of X and B.
1450 
1451 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1452 M*/
1453 
MatCreate_KAIJ(Mat A)1454 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1455 {
1456   Mat_MPIKAIJ *b;
1457   PetscMPIInt  size;
1458 
1459   PetscFunctionBegin;
1460   PetscCall(PetscNew(&b));
1461   A->data = (void *)b;
1462 
1463   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1464 
1465   b->w = NULL;
1466   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1467   if (size == 1) {
1468     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1469     A->ops->destroy             = MatDestroy_SeqKAIJ;
1470     A->ops->mult                = MatMult_SeqKAIJ;
1471     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1472     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1473     A->ops->getrow              = MatGetRow_SeqKAIJ;
1474     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1475     A->ops->sor                 = MatSOR_SeqKAIJ;
1476     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1477   } else {
1478     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1479     A->ops->destroy             = MatDestroy_MPIKAIJ;
1480     A->ops->mult                = MatMult_MPIKAIJ;
1481     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1482     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1483     A->ops->getrow              = MatGetRow_MPIKAIJ;
1484     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1485     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1486     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1487   }
1488   A->ops->setup           = MatSetUp_KAIJ;
1489   A->ops->view            = MatView_KAIJ;
1490   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1491   PetscFunctionReturn(PETSC_SUCCESS);
1492 }
1493