xref: /petsc/src/mat/impls/kaij/kaij.c (revision ae51ba108530e1a8d2aeb8bc3771a24c2fcd801e)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32 
33    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34 
35    Input Parameter:
36 .  A - the `MATKAIJ` matrix
37 
38    Output Parameter:
39 .  B - the `MATAIJ` matrix
40 
41    Level: advanced
42 
43    Note:
44    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45 
46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47 @*/
48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
49 {
50   PetscBool ismpikaij, isseqkaij;
51 
52   PetscFunctionBegin;
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
54   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the `MATKAIJ` matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note:
81    All output parameters are optional (pass NULL if not desired)
82 
83    Level: advanced
84 
85 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
86 @*/
87 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
88 {
89   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
90   PetscFunctionBegin;
91   if (m) *m = b->p;
92   if (n) *n = b->q;
93   if (S) *S = b->S;
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 /*@C
98    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix
99 
100    Not Collective; the entire S is stored and returned independently on all processes.
101 
102    Input Parameter:
103 .  A - the `MATKAIJ` matrix
104 
105    Output Parameters:
106 +  m - the number of rows in S
107 .  n - the number of columns in S
108 -  S - the S matrix, in form of a scalar array in column-major format
109 
110    Note:
111    All output parameters are optional (pass NULL if not desired)
112 
113    Level: advanced
114 
115 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116 @*/
117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
118 {
119   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120   PetscFunctionBegin;
121   if (m) *m = b->p;
122   if (n) *n = b->q;
123   if (S) *S = b->S;
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 /*@C
128   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
129 
130   Not collective
131 
132   Input Parameter:
133 . A - the `MATKAIJ` matrix
134 
135   Output Parameter:
136 . S - location of pointer to array obtained with `MatKAIJGetS()`
137 
138   Note:
139   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
140   If NULL is passed, it will not attempt to zero the array pointer.
141 
142   Level: advanced
143 
144 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
145 @*/
146 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
147 {
148   PetscFunctionBegin;
149   if (S) *S = NULL;
150   PetscCall(PetscObjectStateIncrease((PetscObject)A));
151   PetscFunctionReturn(PETSC_SUCCESS);
152 }
153 
154 /*@C
155   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
156 
157   Not collective
158 
159   Input Parameter:
160 . A - the `MATKAIJ` matrix
161 
162   Output Parameter:
163 . S - location of pointer to array obtained with `MatKAIJGetS()`
164 
165   Note:
166   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
167   If NULL is passed, it will not attempt to zero the array pointer.
168 
169   Level: advanced
170 
171 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
172 @*/
173 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
174 {
175   PetscFunctionBegin;
176   if (S) *S = NULL;
177   PetscFunctionReturn(PETSC_SUCCESS);
178 }
179 
180 /*@C
181    MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix
182 
183    Not Collective; the entire T is stored and returned independently on all processes
184 
185    Input Parameter:
186 .  A - the `MATKAIJ` matrix
187 
188    Output Parameters:
189 +  m - the number of rows in T
190 .  n - the number of columns in T
191 -  T - the T matrix, in form of a scalar array in column-major format
192 
193    Note:
194    All output parameters are optional (pass NULL if not desired)
195 
196    Level: advanced
197 
198 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
199 @*/
200 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
201 {
202   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
203   PetscFunctionBegin;
204   if (m) *m = b->p;
205   if (n) *n = b->q;
206   if (T) *T = b->T;
207   PetscFunctionReturn(PETSC_SUCCESS);
208 }
209 
210 /*@C
211    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix
212 
213    Not Collective; the entire T is stored and returned independently on all processes
214 
215    Input Parameter:
216 .  A - the `MATKAIJ` matrix
217 
218    Output Parameters:
219 +  m - the number of rows in T
220 .  n - the number of columns in T
221 -  T - the T matrix, in form of a scalar array in column-major format
222 
223    Note: All output parameters are optional (pass NULL if not desired)
224 
225    Level: advanced
226 
227 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
228 @*/
229 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
230 {
231   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
232   PetscFunctionBegin;
233   if (m) *m = b->p;
234   if (n) *n = b->q;
235   if (T) *T = b->T;
236   PetscFunctionReturn(PETSC_SUCCESS);
237 }
238 
239 /*@C
240   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
241 
242   Not collective
243 
244   Input Parameter:
245 . A - the `MATKAIJ` matrix
246 
247   Output Parameter:
248 . T - location of pointer to array obtained with `MatKAIJGetS()`
249 
250   Note:
251   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252   If NULL is passed, it will not attempt to zero the array pointer.
253 
254   Level: advanced
255 
256 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
257 @*/
258 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
259 {
260   PetscFunctionBegin;
261   if (T) *T = NULL;
262   PetscCall(PetscObjectStateIncrease((PetscObject)A));
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 /*@C
267   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
268 
269   Not collective
270 
271   Input Parameter:
272 . A - the `MATKAIJ` matrix
273 
274   Output Parameter:
275 . T - location of pointer to array obtained with `MatKAIJGetS()`
276 
277   Note:
278   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
279   If NULL is passed, it will not attempt to zero the array pointer.
280 
281   Level: advanced
282 
283 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
284 @*/
285 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
286 {
287   PetscFunctionBegin;
288   if (T) *T = NULL;
289   PetscFunctionReturn(PETSC_SUCCESS);
290 }
291 
292 /*@
293    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
294 
295    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
296 
297    Input Parameters:
298 +  A - the `MATKAIJ` matrix
299 -  B - the `MATAIJ` matrix
300 
301    Notes:
302    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
303 
304    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
305 
306    Level: advanced
307 
308 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
309 @*/
310 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
311 {
312   PetscMPIInt size;
313   PetscBool   flg;
314 
315   PetscFunctionBegin;
316   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
317   if (size == 1) {
318     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
319     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
320     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
321     a->AIJ         = B;
322   } else {
323     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
324     a->A           = B;
325   }
326   PetscCall(PetscObjectReference((PetscObject)B));
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 /*@C
331    MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix
332 
333    Logically Collective; the entire S is stored independently on all processes.
334 
335    Input Parameters:
336 +  A - the `MATKAIJ` matrix
337 .  p - the number of rows in S
338 .  q - the number of columns in S
339 -  S - the S matrix, in form of a scalar array in column-major format
340 
341    Notes:
342    The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix.
343 
344    The S matrix is copied, so the user can destroy this array.
345 
346    Level: advanced
347 
348 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
349 @*/
350 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
351 {
352   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(PetscFree(a->S));
356   if (S) {
357     PetscCall(PetscMalloc1(p * q, &a->S));
358     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
359   } else a->S = NULL;
360 
361   a->p = p;
362   a->q = q;
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 /*@C
367    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
368 
369    Logically Collective.
370 
371    Input Parameter:
372 .  A - the `MATKAIJ` matrix
373 
374   Output Parameter:
375 .  identity - the Boolean value
376 
377    Level: Advanced
378 
379 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
380 @*/
381 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
382 {
383   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
384   PetscInt     i, j;
385 
386   PetscFunctionBegin;
387   if (a->p != a->q) {
388     *identity = PETSC_FALSE;
389     PetscFunctionReturn(PETSC_SUCCESS);
390   } else *identity = PETSC_TRUE;
391   if (!a->isTI || a->S) {
392     for (i = 0; i < a->p && *identity; i++) {
393       for (j = 0; j < a->p && *identity; j++) {
394         if (i != j) {
395           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
396           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
397         } else {
398           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
399           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 /*@C
408    MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix
409 
410    Logically Collective; the entire T is stored independently on all processes.
411 
412    Input Parameters:
413 +  A - the `MATKAIJ` matrix
414 .  p - the number of rows in S
415 .  q - the number of columns in S
416 -  T - the T matrix, in form of a scalar array in column-major format
417 
418    Notes:
419    The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix.
420 
421    The T matrix is copied, so the user can destroy this array.
422 
423    Level: Advanced
424 
425 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
426 @*/
427 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
428 {
429   PetscInt     i, j;
430   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
431   PetscBool    isTI = PETSC_FALSE;
432 
433   PetscFunctionBegin;
434   /* check if T is an identity matrix */
435   if (T && (p == q)) {
436     isTI = PETSC_TRUE;
437     for (i = 0; i < p; i++) {
438       for (j = 0; j < q; j++) {
439         if (i == j) {
440           /* diagonal term must be 1 */
441           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
442         } else {
443           /* off-diagonal term must be 0 */
444           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
445         }
446       }
447     }
448   }
449   a->isTI = isTI;
450 
451   PetscCall(PetscFree(a->T));
452   if (T && (!isTI)) {
453     PetscCall(PetscMalloc1(p * q, &a->T));
454     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
455   } else a->T = NULL;
456 
457   a->p = p;
458   a->q = q;
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
463 {
464   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
465 
466   PetscFunctionBegin;
467   PetscCall(MatDestroy(&b->AIJ));
468   PetscCall(PetscFree(b->S));
469   PetscCall(PetscFree(b->T));
470   PetscCall(PetscFree(b->ibdiag));
471   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
472   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
473   PetscCall(PetscFree(A->data));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
478 {
479   Mat_MPIKAIJ     *a;
480   Mat_MPIAIJ      *mpiaij;
481   PetscScalar     *T;
482   PetscInt         i, j;
483   PetscObjectState state;
484 
485   PetscFunctionBegin;
486   a      = (Mat_MPIKAIJ *)A->data;
487   mpiaij = (Mat_MPIAIJ *)a->A->data;
488 
489   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
490   if (state == a->state) {
491     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
492     PetscFunctionReturn(PETSC_SUCCESS);
493   } else {
494     PetscCall(MatDestroy(&a->AIJ));
495     PetscCall(MatDestroy(&a->OAIJ));
496     if (a->isTI) {
497       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
498        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
499        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
500        * to pass in. */
501       PetscCall(PetscMalloc1(a->p * a->q, &T));
502       for (i = 0; i < a->p; i++) {
503         for (j = 0; j < a->q; j++) {
504           if (i == j) T[i + j * a->p] = 1.0;
505           else T[i + j * a->p] = 0.0;
506         }
507       }
508     } else T = a->T;
509     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
510     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
511     if (a->isTI) PetscCall(PetscFree(T));
512     a->state = state;
513   }
514 
515   PetscFunctionReturn(PETSC_SUCCESS);
516 }
517 
518 static PetscErrorCode MatSetUp_KAIJ(Mat A)
519 {
520   PetscInt     n;
521   PetscMPIInt  size;
522   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
523 
524   PetscFunctionBegin;
525   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
526   if (size == 1) {
527     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
528     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
529     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
530     PetscCall(PetscLayoutSetUp(A->rmap));
531     PetscCall(PetscLayoutSetUp(A->cmap));
532   } else {
533     Mat_MPIKAIJ *a;
534     Mat_MPIAIJ  *mpiaij;
535     IS           from, to;
536     Vec          gvec;
537 
538     a      = (Mat_MPIKAIJ *)A->data;
539     mpiaij = (Mat_MPIAIJ *)a->A->data;
540     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
541     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
542     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
543     PetscCall(PetscLayoutSetUp(A->rmap));
544     PetscCall(PetscLayoutSetUp(A->cmap));
545 
546     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
547 
548     PetscCall(VecGetSize(mpiaij->lvec, &n));
549     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
550     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
551     PetscCall(VecSetBlockSize(a->w, a->q));
552     PetscCall(VecSetType(a->w, VECSEQ));
553 
554     /* create two temporary Index sets for build scatter gather */
555     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
556     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
557 
558     /* create temporary global vector to generate scatter context */
559     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
560 
561     /* generate the scatter context */
562     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
563 
564     PetscCall(ISDestroy(&from));
565     PetscCall(ISDestroy(&to));
566     PetscCall(VecDestroy(&gvec));
567   }
568 
569   A->assembled = PETSC_TRUE;
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
574 {
575   PetscViewerFormat format;
576   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
577   Mat               B;
578   PetscInt          i;
579   PetscBool         ismpikaij;
580 
581   PetscFunctionBegin;
582   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
583   PetscCall(PetscViewerGetFormat(viewer, &format));
584   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
585     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
586 
587     /* Print appropriate details for S. */
588     if (!a->S) {
589       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
590     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
592       for (i = 0; i < (a->p * a->q); i++) {
593 #if defined(PETSC_USE_COMPLEX)
594         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
595 #else
596         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
597 #endif
598       }
599       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
600     }
601 
602     /* Print appropriate details for T. */
603     if (a->isTI) {
604       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
605     } else if (!a->T) {
606       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
607     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
608       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
609       for (i = 0; i < (a->p * a->q); i++) {
610 #if defined(PETSC_USE_COMPLEX)
611         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
612 #else
613         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
614 #endif
615       }
616       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
617     }
618 
619     /* Now print details for the AIJ matrix, using the AIJ viewer. */
620     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
621     if (ismpikaij) {
622       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
623       PetscCall(MatView(b->A, viewer));
624     } else {
625       PetscCall(MatView(a->AIJ, viewer));
626     }
627 
628   } else {
629     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
630     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
631     PetscCall(MatView(B, viewer));
632     PetscCall(MatDestroy(&B));
633   }
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
638 {
639   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
640 
641   PetscFunctionBegin;
642   PetscCall(MatDestroy(&b->AIJ));
643   PetscCall(MatDestroy(&b->OAIJ));
644   PetscCall(MatDestroy(&b->A));
645   PetscCall(VecScatterDestroy(&b->ctx));
646   PetscCall(VecDestroy(&b->w));
647   PetscCall(PetscFree(b->S));
648   PetscCall(PetscFree(b->T));
649   PetscCall(PetscFree(b->ibdiag));
650   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
652   PetscCall(PetscFree(A->data));
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /* --------------------------------------------------------------------------------------*/
657 
658 /* zz = yy + Axx */
659 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
660 {
661   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
662   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
663   const PetscScalar *s = b->S, *t = b->T;
664   const PetscScalar *x, *v, *bx;
665   PetscScalar       *y, *sums;
666   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
667   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
668 
669   PetscFunctionBegin;
670   if (!yy) {
671     PetscCall(VecSet(zz, 0.0));
672   } else {
673     PetscCall(VecCopy(yy, zz));
674   }
675   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
676 
677   PetscCall(VecGetArrayRead(xx, &x));
678   PetscCall(VecGetArray(zz, &y));
679   idx = a->j;
680   v   = a->a;
681   ii  = a->i;
682 
683   if (b->isTI) {
684     for (i = 0; i < m; i++) {
685       jrow = ii[i];
686       n    = ii[i + 1] - jrow;
687       sums = y + p * i;
688       for (j = 0; j < n; j++) {
689         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
690       }
691     }
692     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
693   } else if (t) {
694     for (i = 0; i < m; i++) {
695       jrow = ii[i];
696       n    = ii[i + 1] - jrow;
697       sums = y + p * i;
698       for (j = 0; j < n; j++) {
699         for (k = 0; k < p; k++) {
700           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
701         }
702       }
703     }
704     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
705      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
706      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
707      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
708     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
709   }
710   if (s) {
711     for (i = 0; i < m; i++) {
712       sums = y + p * i;
713       bx   = x + q * i;
714       if (i < b->AIJ->cmap->n) {
715         for (j = 0; j < q; j++) {
716           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
717         }
718       }
719     }
720     PetscCall(PetscLogFlops(2.0 * m * p * q));
721   }
722 
723   PetscCall(VecRestoreArrayRead(xx, &x));
724   PetscCall(VecRestoreArray(zz, &y));
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
729 {
730   PetscFunctionBegin;
731   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
732   PetscFunctionReturn(PETSC_SUCCESS);
733 }
734 
735 #include <petsc/private/kernels/blockinvert.h>
736 
737 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
738 {
739   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
740   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
741   const PetscScalar *S = b->S;
742   const PetscScalar *T = b->T;
743   const PetscScalar *v = a->a;
744   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
745   PetscInt           i, j, *v_pivots, dof, dof2;
746   PetscScalar       *diag, aval, *v_work;
747 
748   PetscFunctionBegin;
749   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
750   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
751 
752   dof  = p;
753   dof2 = dof * dof;
754 
755   if (b->ibdiagvalid) {
756     if (values) *values = b->ibdiag;
757     PetscFunctionReturn(PETSC_SUCCESS);
758   }
759   if (!b->ibdiag) { PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); }
760   if (values) *values = b->ibdiag;
761   diag = b->ibdiag;
762 
763   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
764   for (i = 0; i < m; i++) {
765     if (S) {
766       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
767     } else {
768       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
769     }
770     if (b->isTI) {
771       aval = 0;
772       for (j = ii[i]; j < ii[i + 1]; j++)
773         if (idx[j] == i) aval = v[j];
774       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
775     } else if (T) {
776       aval = 0;
777       for (j = ii[i]; j < ii[i + 1]; j++)
778         if (idx[j] == i) aval = v[j];
779       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
780     }
781     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
782     diag += dof2;
783   }
784   PetscCall(PetscFree2(v_work, v_pivots));
785 
786   b->ibdiagvalid = PETSC_TRUE;
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
791 {
792   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
793 
794   PetscFunctionBegin;
795   *B = kaij->AIJ;
796   PetscFunctionReturn(PETSC_SUCCESS);
797 }
798 
799 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
800 {
801   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
802   Mat            AIJ, OAIJ, B;
803   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
804   const PetscInt p = a->p, q = a->q;
805   PetscBool      ismpikaij, missing;
806 
807   PetscFunctionBegin;
808   if (reuse != MAT_REUSE_MATRIX) {
809     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
810     if (ismpikaij) {
811       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
812       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
813       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
814     } else {
815       AIJ  = a->AIJ;
816       OAIJ = NULL;
817     }
818     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
819     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
820     PetscCall(MatSetType(B, MATAIJ));
821     PetscCall(MatGetSize(AIJ, &m, NULL));
822     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
823     if (!missing || !a->S) d = m;
824     PetscCall(PetscMalloc1(m * p, &d_nnz));
825     for (i = 0; i < m; ++i) {
826       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
827       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
828       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
829     }
830     if (OAIJ) {
831       PetscCall(PetscMalloc1(m * p, &o_nnz));
832       for (i = 0; i < m; ++i) {
833         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
834         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
835         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
836       }
837       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
838     } else {
839       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
840     }
841     PetscCall(PetscFree(d_nnz));
842     PetscCall(PetscFree(o_nnz));
843   } else B = *newmat;
844   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
845   if (reuse == MAT_INPLACE_MATRIX) {
846     PetscCall(MatHeaderReplace(A, &B));
847   } else *newmat = B;
848   PetscFunctionReturn(PETSC_SUCCESS);
849 }
850 
851 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
852 {
853   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
854   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
855   const PetscScalar *aa = a->a, *T = kaij->T, *v;
856   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
857   const PetscScalar *b, *xb, *idiag;
858   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
859   PetscInt           i, j, k, i2, bs, bs2, nz;
860 
861   PetscFunctionBegin;
862   its = its * lits;
863   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
864   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
865   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
866   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
867   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
868   bs  = p;
869   bs2 = bs * bs;
870 
871   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
872 
873   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
874   idiag = kaij->ibdiag;
875   diag  = a->diag;
876 
877   if (!kaij->sor.setup) {
878     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
879     kaij->sor.setup = PETSC_TRUE;
880   }
881   y    = kaij->sor.y;
882   w    = kaij->sor.w;
883   work = kaij->sor.work;
884   t    = kaij->sor.t;
885   arr  = kaij->sor.arr;
886 
887   PetscCall(VecGetArray(xx, &x));
888   PetscCall(VecGetArrayRead(bb, &b));
889 
890   if (flag & SOR_ZERO_INITIAL_GUESS) {
891     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
892       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
893       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
894       i2 = bs;
895       idiag += bs2;
896       for (i = 1; i < m; i++) {
897         v  = aa + ai[i];
898         vi = aj + ai[i];
899         nz = diag[i] - ai[i];
900 
901         if (T) { /* b - T (Arow * x) */
902           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
903           for (j = 0; j < nz; j++) {
904             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
905           }
906           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
907           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
908         } else if (kaij->isTI) {
909           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
910           for (j = 0; j < nz; j++) {
911             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
912           }
913         } else {
914           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
915         }
916 
917         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
918         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
919 
920         idiag += bs2;
921         i2 += bs;
922       }
923       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
924       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
925       xb = t;
926     } else xb = b;
927     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
928       idiag = kaij->ibdiag + bs2 * (m - 1);
929       i2    = bs * (m - 1);
930       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
931       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
932       i2 -= bs;
933       idiag -= bs2;
934       for (i = m - 2; i >= 0; i--) {
935         v  = aa + diag[i] + 1;
936         vi = aj + diag[i] + 1;
937         nz = ai[i + 1] - diag[i] - 1;
938 
939         if (T) { /* FIXME: This branch untested */
940           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
941           /* copy all rows of x that are needed into contiguous space */
942           workt = work;
943           for (j = 0; j < nz; j++) {
944             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
945             workt += bs;
946           }
947           arrt = arr;
948           for (j = 0; j < nz; j++) {
949             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
950             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
951             arrt += bs2;
952           }
953           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
954         } else if (kaij->isTI) {
955           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
956           for (j = 0; j < nz; j++) {
957             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
958           }
959         }
960 
961         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
962         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
963 
964         idiag -= bs2;
965         i2 -= bs;
966       }
967       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
968     }
969     its--;
970   }
971   while (its--) { /* FIXME: This branch not updated */
972     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
973       i2    = 0;
974       idiag = kaij->ibdiag;
975       for (i = 0; i < m; i++) {
976         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
977 
978         v     = aa + ai[i];
979         vi    = aj + ai[i];
980         nz    = diag[i] - ai[i];
981         workt = work;
982         for (j = 0; j < nz; j++) {
983           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
984           workt += bs;
985         }
986         arrt = arr;
987         if (T) {
988           for (j = 0; j < nz; j++) {
989             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
990             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
991             arrt += bs2;
992           }
993           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
994         } else if (kaij->isTI) {
995           for (j = 0; j < nz; j++) {
996             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
997             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
998             arrt += bs2;
999           }
1000           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1001         }
1002         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
1003 
1004         v     = aa + diag[i] + 1;
1005         vi    = aj + diag[i] + 1;
1006         nz    = ai[i + 1] - diag[i] - 1;
1007         workt = work;
1008         for (j = 0; j < nz; j++) {
1009           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1010           workt += bs;
1011         }
1012         arrt = arr;
1013         if (T) {
1014           for (j = 0; j < nz; j++) {
1015             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1016             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1017             arrt += bs2;
1018           }
1019           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1020         } else if (kaij->isTI) {
1021           for (j = 0; j < nz; j++) {
1022             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1023             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1024             arrt += bs2;
1025           }
1026           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1027         }
1028 
1029         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1030         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1031 
1032         idiag += bs2;
1033         i2 += bs;
1034       }
1035       xb = t;
1036     } else xb = b;
1037     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1038       idiag = kaij->ibdiag + bs2 * (m - 1);
1039       i2    = bs * (m - 1);
1040       if (xb == b) {
1041         for (i = m - 1; i >= 0; i--) {
1042           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1043 
1044           v     = aa + ai[i];
1045           vi    = aj + ai[i];
1046           nz    = diag[i] - ai[i];
1047           workt = work;
1048           for (j = 0; j < nz; j++) {
1049             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1050             workt += bs;
1051           }
1052           arrt = arr;
1053           if (T) {
1054             for (j = 0; j < nz; j++) {
1055               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1056               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1057               arrt += bs2;
1058             }
1059             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1060           } else if (kaij->isTI) {
1061             for (j = 0; j < nz; j++) {
1062               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1063               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1064               arrt += bs2;
1065             }
1066             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1067           }
1068 
1069           v     = aa + diag[i] + 1;
1070           vi    = aj + diag[i] + 1;
1071           nz    = ai[i + 1] - diag[i] - 1;
1072           workt = work;
1073           for (j = 0; j < nz; j++) {
1074             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1075             workt += bs;
1076           }
1077           arrt = arr;
1078           if (T) {
1079             for (j = 0; j < nz; j++) {
1080               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1081               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1082               arrt += bs2;
1083             }
1084             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1085           } else if (kaij->isTI) {
1086             for (j = 0; j < nz; j++) {
1087               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1088               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1089               arrt += bs2;
1090             }
1091             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1092           }
1093 
1094           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1095           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1096         }
1097       } else {
1098         for (i = m - 1; i >= 0; i--) {
1099           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1100           v     = aa + diag[i] + 1;
1101           vi    = aj + diag[i] + 1;
1102           nz    = ai[i + 1] - diag[i] - 1;
1103           workt = work;
1104           for (j = 0; j < nz; j++) {
1105             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1106             workt += bs;
1107           }
1108           arrt = arr;
1109           if (T) {
1110             for (j = 0; j < nz; j++) {
1111               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1112               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1113               arrt += bs2;
1114             }
1115             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1116           } else if (kaij->isTI) {
1117             for (j = 0; j < nz; j++) {
1118               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1119               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1120               arrt += bs2;
1121             }
1122             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1123           }
1124           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1125           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1126         }
1127       }
1128       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1129     }
1130   }
1131 
1132   PetscCall(VecRestoreArray(xx, &x));
1133   PetscCall(VecRestoreArrayRead(bb, &b));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 /*===================================================================================*/
1138 
1139 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1140 {
1141   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1142 
1143   PetscFunctionBegin;
1144   if (!yy) {
1145     PetscCall(VecSet(zz, 0.0));
1146   } else {
1147     PetscCall(VecCopy(yy, zz));
1148   }
1149   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1150   /* start the scatter */
1151   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1152   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1153   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1154   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1155   PetscFunctionReturn(PETSC_SUCCESS);
1156 }
1157 
1158 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1159 {
1160   PetscFunctionBegin;
1161   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1166 {
1167   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1171   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 /* ----------------------------------------------------------------*/
1176 
1177 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1178 {
1179   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1180   PetscBool    diag = PETSC_FALSE;
1181   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1182   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1183 
1184   PetscFunctionBegin;
1185   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1186   b->getrowactive = PETSC_TRUE;
1187   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1188 
1189   if ((!S) && (!T) && (!b->isTI)) {
1190     if (ncols) *ncols = 0;
1191     if (cols) *cols = NULL;
1192     if (values) *values = NULL;
1193     PetscFunctionReturn(PETSC_SUCCESS);
1194   }
1195 
1196   if (T || b->isTI) {
1197     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1198     c = nzaij;
1199     for (i = 0; i < nzaij; i++) {
1200       /* check if this row contains a diagonal entry */
1201       if (colsaij[i] == r) {
1202         diag = PETSC_TRUE;
1203         c    = i;
1204       }
1205     }
1206   } else nzaij = c = 0;
1207 
1208   /* calculate size of row */
1209   nz = 0;
1210   if (S) nz += q;
1211   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1212 
1213   if (cols || values) {
1214     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1215     for (i = 0; i < q; i++) {
1216       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1217       v[i] = 0.0;
1218     }
1219     if (b->isTI) {
1220       for (i = 0; i < nzaij; i++) {
1221         for (j = 0; j < q; j++) {
1222           idx[i * q + j] = colsaij[i] * q + j;
1223           v[i * q + j]   = (j == s ? vaij[i] : 0);
1224         }
1225       }
1226     } else if (T) {
1227       for (i = 0; i < nzaij; i++) {
1228         for (j = 0; j < q; j++) {
1229           idx[i * q + j] = colsaij[i] * q + j;
1230           v[i * q + j]   = vaij[i] * T[s + j * p];
1231         }
1232       }
1233     }
1234     if (S) {
1235       for (j = 0; j < q; j++) {
1236         idx[c * q + j] = r * q + j;
1237         v[c * q + j] += S[s + j * p];
1238       }
1239     }
1240   }
1241 
1242   if (ncols) *ncols = nz;
1243   if (cols) *cols = idx;
1244   if (values) *values = v;
1245   PetscFunctionReturn(PETSC_SUCCESS);
1246 }
1247 
1248 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1249 {
1250   PetscFunctionBegin;
1251   if (nz) *nz = 0;
1252   PetscCall(PetscFree2(*idx, *v));
1253   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1254   PetscFunctionReturn(PETSC_SUCCESS);
1255 }
1256 
1257 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1258 {
1259   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1260   Mat            AIJ  = b->A;
1261   PetscBool      diag = PETSC_FALSE;
1262   Mat            MatAIJ, MatOAIJ;
1263   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1264   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1265   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1266 
1267   PetscFunctionBegin;
1268   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1269   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1270   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1271   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1272   b->getrowactive = PETSC_TRUE;
1273   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1274   lrow = row - rstart;
1275 
1276   if ((!S) && (!T) && (!b->isTI)) {
1277     if (ncols) *ncols = 0;
1278     if (cols) *cols = NULL;
1279     if (values) *values = NULL;
1280     PetscFunctionReturn(PETSC_SUCCESS);
1281   }
1282 
1283   r = lrow / p;
1284   s = lrow % p;
1285 
1286   if (T || b->isTI) {
1287     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1288     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1289     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1290 
1291     c = ncolsaij + ncolsoaij;
1292     for (i = 0; i < ncolsaij; i++) {
1293       /* check if this row contains a diagonal entry */
1294       if (colsaij[i] == r) {
1295         diag = PETSC_TRUE;
1296         c    = i;
1297       }
1298     }
1299   } else c = 0;
1300 
1301   /* calculate size of row */
1302   nz = 0;
1303   if (S) nz += q;
1304   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1305 
1306   if (cols || values) {
1307     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1308     for (i = 0; i < q; i++) {
1309       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1310       v[i] = 0.0;
1311     }
1312     if (b->isTI) {
1313       for (i = 0; i < ncolsaij; i++) {
1314         for (j = 0; j < q; j++) {
1315           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1316           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1317         }
1318       }
1319       for (i = 0; i < ncolsoaij; i++) {
1320         for (j = 0; j < q; j++) {
1321           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1322           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1323         }
1324       }
1325     } else if (T) {
1326       for (i = 0; i < ncolsaij; i++) {
1327         for (j = 0; j < q; j++) {
1328           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1329           v[i * q + j]   = vals[i] * T[s + j * p];
1330         }
1331       }
1332       for (i = 0; i < ncolsoaij; i++) {
1333         for (j = 0; j < q; j++) {
1334           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1335           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1336         }
1337       }
1338     }
1339     if (S) {
1340       for (j = 0; j < q; j++) {
1341         idx[c * q + j] = (r + rstart / p) * q + j;
1342         v[c * q + j] += S[s + j * p];
1343       }
1344     }
1345   }
1346 
1347   if (ncols) *ncols = nz;
1348   if (cols) *cols = idx;
1349   if (values) *values = v;
1350   PetscFunctionReturn(PETSC_SUCCESS);
1351 }
1352 
1353 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1354 {
1355   PetscFunctionBegin;
1356   PetscCall(PetscFree2(*idx, *v));
1357   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1358   PetscFunctionReturn(PETSC_SUCCESS);
1359 }
1360 
1361 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1362 {
1363   Mat A;
1364 
1365   PetscFunctionBegin;
1366   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1367   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1368   PetscCall(MatDestroy(&A));
1369   PetscFunctionReturn(PETSC_SUCCESS);
1370 }
1371 
1372 /* ---------------------------------------------------------------------------------- */
1373 /*@C
1374   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form:
1375 
1376     [I \otimes S + A \otimes T]
1377 
1378   where
1379     S is a dense (p \times q) matrix
1380     T is a dense (p \times q) matrix
1381     A is an `MATAIJ`  (n \times n) matrix
1382     I is the identity matrix
1383   The resulting matrix is (np \times nq)
1384 
1385   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1386 
1387   Collective
1388 
1389   Input Parameters:
1390 + A - the `MATAIJ` matrix
1391 . p - number of rows in S and T
1392 . q - number of columns in S and T
1393 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1394 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1395 
1396   Output Parameter:
1397 . kaij - the new `MATKAIJ` matrix
1398 
1399   Notes:
1400   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1401 
1402   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1403 
1404   Developer Note:
1405   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1406   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1407   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1408   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1409 
1410   Level: advanced
1411 
1412 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1413 @*/
1414 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1415 {
1416   PetscFunctionBegin;
1417   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1418   PetscCall(MatSetType(*kaij, MATKAIJ));
1419   PetscCall(MatKAIJSetAIJ(*kaij, A));
1420   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1421   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1422   PetscCall(MatSetUp(*kaij));
1423   PetscFunctionReturn(PETSC_SUCCESS);
1424 }
1425 
1426 /*MC
1427   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1428     [I \otimes S + A \otimes T],
1429   where
1430     S is a dense (p \times q) matrix,
1431     T is a dense (p \times q) matrix,
1432     A is an AIJ  (n \times n) matrix,
1433     and I is the identity matrix.
1434   The resulting matrix is (np \times nq).
1435 
1436   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1437 
1438   Note:
1439   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1440   where x and b are column vectors containing the row-major representations of X and B.
1441 
1442   Level: advanced
1443 
1444 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1445 M*/
1446 
1447 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1448 {
1449   Mat_MPIKAIJ *b;
1450   PetscMPIInt  size;
1451 
1452   PetscFunctionBegin;
1453   PetscCall(PetscNew(&b));
1454   A->data = (void *)b;
1455 
1456   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1457 
1458   b->w = NULL;
1459   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1460   if (size == 1) {
1461     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1462     A->ops->destroy             = MatDestroy_SeqKAIJ;
1463     A->ops->mult                = MatMult_SeqKAIJ;
1464     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1465     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1466     A->ops->getrow              = MatGetRow_SeqKAIJ;
1467     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1468     A->ops->sor                 = MatSOR_SeqKAIJ;
1469     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1470   } else {
1471     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1472     A->ops->destroy             = MatDestroy_MPIKAIJ;
1473     A->ops->mult                = MatMult_MPIKAIJ;
1474     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1475     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1476     A->ops->getrow              = MatGetRow_MPIKAIJ;
1477     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1478     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1479     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1480   }
1481   A->ops->setup           = MatSetUp_KAIJ;
1482   A->ops->view            = MatView_KAIJ;
1483   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486