xref: /petsc/src/mat/impls/kaij/kaij.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: `MatCreateKAIJ()`
46 @*/
47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) {
48   PetscBool ismpikaij, isseqkaij;
49 
50   PetscFunctionBegin;
51   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
53   if (ismpikaij) {
54     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
55 
56     *B = b->A;
57   } else if (isseqkaij) {
58     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
59 
60     *B = b->AIJ;
61   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
62   PetscFunctionReturn(0);
63 }
64 
65 /*@C
66    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
67 
68    Not Collective; the entire S is stored and returned independently on all processes.
69 
70    Input Parameter:
71 .  A - the KAIJ matrix
72 
73    Output Parameters:
74 +  m - the number of rows in S
75 .  n - the number of columns in S
76 -  S - the S matrix, in form of a scalar array in column-major format
77 
78    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
79 
80    Level: advanced
81 
82 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
83 @*/
84 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) {
85   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
86   PetscFunctionBegin;
87   if (m) *m = b->p;
88   if (n) *n = b->q;
89   if (S) *S = b->S;
90   PetscFunctionReturn(0);
91 }
92 
93 /*@C
94    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
95 
96    Not Collective; the entire S is stored and returned independently on all processes.
97 
98    Input Parameter:
99 .  A - the KAIJ matrix
100 
101    Output Parameters:
102 +  m - the number of rows in S
103 .  n - the number of columns in S
104 -  S - the S matrix, in form of a scalar array in column-major format
105 
106    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
107 
108    Level: advanced
109 
110 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
111 @*/
112 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) {
113   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
114   PetscFunctionBegin;
115   if (m) *m = b->p;
116   if (n) *n = b->q;
117   if (S) *S = b->S;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@C
122   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
123 
124   Not collective
125 
126   Input Parameter:
127 . A - the KAIJ matrix
128 
129   Output Parameter:
130 . S - location of pointer to array obtained with MatKAIJGetS()
131 
132   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
133   If NULL is passed, it will not attempt to zero the array pointer.
134 
135   Level: advanced
136 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
137 @*/
138 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) {
139   PetscFunctionBegin;
140   if (S) *S = NULL;
141   PetscCall(PetscObjectStateIncrease((PetscObject)A));
142   PetscFunctionReturn(0);
143 }
144 
145 /*@C
146   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
147 
148   Not collective
149 
150   Input Parameter:
151 . A - the KAIJ matrix
152 
153   Output Parameter:
154 . S - location of pointer to array obtained with MatKAIJGetS()
155 
156   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
157   If NULL is passed, it will not attempt to zero the array pointer.
158 
159   Level: advanced
160 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
161 @*/
162 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) {
163   PetscFunctionBegin;
164   if (S) *S = NULL;
165   PetscFunctionReturn(0);
166 }
167 
168 /*@C
169    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
170 
171    Not Collective; the entire T is stored and returned independently on all processes
172 
173    Input Parameter:
174 .  A - the KAIJ matrix
175 
176    Output Parameters:
177 +  m - the number of rows in T
178 .  n - the number of columns in T
179 -  T - the T matrix, in form of a scalar array in column-major format
180 
181    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
182 
183    Level: advanced
184 
185 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
186 @*/
187 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) {
188   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
189   PetscFunctionBegin;
190   if (m) *m = b->p;
191   if (n) *n = b->q;
192   if (T) *T = b->T;
193   PetscFunctionReturn(0);
194 }
195 
196 /*@C
197    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
198 
199    Not Collective; the entire T is stored and returned independently on all processes
200 
201    Input Parameter:
202 .  A - the KAIJ matrix
203 
204    Output Parameters:
205 +  m - the number of rows in T
206 .  n - the number of columns in T
207 -  T - the T matrix, in form of a scalar array in column-major format
208 
209    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
210 
211    Level: advanced
212 
213 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
214 @*/
215 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) {
216   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
217   PetscFunctionBegin;
218   if (m) *m = b->p;
219   if (n) *n = b->q;
220   if (T) *T = b->T;
221   PetscFunctionReturn(0);
222 }
223 
224 /*@C
225   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
226 
227   Not collective
228 
229   Input Parameter:
230 . A - the KAIJ matrix
231 
232   Output Parameter:
233 . T - location of pointer to array obtained with MatKAIJGetS()
234 
235   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
236   If NULL is passed, it will not attempt to zero the array pointer.
237 
238   Level: advanced
239 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
240 @*/
241 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) {
242   PetscFunctionBegin;
243   if (T) *T = NULL;
244   PetscCall(PetscObjectStateIncrease((PetscObject)A));
245   PetscFunctionReturn(0);
246 }
247 
248 /*@C
249   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
250 
251   Not collective
252 
253   Input Parameter:
254 . A - the KAIJ matrix
255 
256   Output Parameter:
257 . T - location of pointer to array obtained with MatKAIJGetS()
258 
259   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
260   If NULL is passed, it will not attempt to zero the array pointer.
261 
262   Level: advanced
263 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
264 @*/
265 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) {
266   PetscFunctionBegin;
267   if (T) *T = NULL;
268   PetscFunctionReturn(0);
269 }
270 
271 /*@
272    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
273 
274    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
275 
276    Input Parameters:
277 +  A - the KAIJ matrix
278 -  B - the AIJ matrix
279 
280    Notes:
281    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
282    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
283 
284    Level: advanced
285 
286 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
287 @*/
288 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) {
289   PetscMPIInt size;
290   PetscBool   flg;
291 
292   PetscFunctionBegin;
293   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
294   if (size == 1) {
295     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
296     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
297     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
298     a->AIJ         = B;
299   } else {
300     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
301     a->A           = B;
302   }
303   PetscCall(PetscObjectReference((PetscObject)B));
304   PetscFunctionReturn(0);
305 }
306 
307 /*@C
308    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
309 
310    Logically Collective; the entire S is stored independently on all processes.
311 
312    Input Parameters:
313 +  A - the KAIJ matrix
314 .  p - the number of rows in S
315 .  q - the number of columns in S
316 -  S - the S matrix, in form of a scalar array in column-major format
317 
318    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
319    The S matrix is copied, so the user can destroy this array.
320 
321    Level: Advanced
322 
323 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
324 @*/
325 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) {
326   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
327 
328   PetscFunctionBegin;
329   PetscCall(PetscFree(a->S));
330   if (S) {
331     PetscCall(PetscMalloc1(p * q, &a->S));
332     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
333   } else a->S = NULL;
334 
335   a->p = p;
336   a->q = q;
337   PetscFunctionReturn(0);
338 }
339 
340 /*@C
341    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
342 
343    Logically Collective.
344 
345    Input Parameter:
346 .  A - the KAIJ matrix
347 
348   Output Parameter:
349 .  identity - the Boolean value
350 
351    Level: Advanced
352 
353 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()`
354 @*/
355 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) {
356   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
357   PetscInt     i, j;
358 
359   PetscFunctionBegin;
360   if (a->p != a->q) {
361     *identity = PETSC_FALSE;
362     PetscFunctionReturn(0);
363   } else *identity = PETSC_TRUE;
364   if (!a->isTI || a->S) {
365     for (i = 0; i < a->p && *identity; i++) {
366       for (j = 0; j < a->p && *identity; j++) {
367         if (i != j) {
368           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
369           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
370         } else {
371           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
372           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 /*@C
381    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
382 
383    Logically Collective; the entire T is stored independently on all processes.
384 
385    Input Parameters:
386 +  A - the KAIJ matrix
387 .  p - the number of rows in S
388 .  q - the number of columns in S
389 -  T - the T matrix, in form of a scalar array in column-major format
390 
391    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
392    The T matrix is copied, so the user can destroy this array.
393 
394    Level: Advanced
395 
396 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
397 @*/
398 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) {
399   PetscInt     i, j;
400   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
401   PetscBool    isTI = PETSC_FALSE;
402 
403   PetscFunctionBegin;
404   /* check if T is an identity matrix */
405   if (T && (p == q)) {
406     isTI = PETSC_TRUE;
407     for (i = 0; i < p; i++) {
408       for (j = 0; j < q; j++) {
409         if (i == j) {
410           /* diagonal term must be 1 */
411           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
412         } else {
413           /* off-diagonal term must be 0 */
414           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
415         }
416       }
417     }
418   }
419   a->isTI = isTI;
420 
421   PetscCall(PetscFree(a->T));
422   if (T && (!isTI)) {
423     PetscCall(PetscMalloc1(p * q, &a->T));
424     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
425   } else a->T = NULL;
426 
427   a->p = p;
428   a->q = q;
429   PetscFunctionReturn(0);
430 }
431 
432 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) {
433   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
434 
435   PetscFunctionBegin;
436   PetscCall(MatDestroy(&b->AIJ));
437   PetscCall(PetscFree(b->S));
438   PetscCall(PetscFree(b->T));
439   PetscCall(PetscFree(b->ibdiag));
440   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
441   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
442   PetscCall(PetscFree(A->data));
443   PetscFunctionReturn(0);
444 }
445 
446 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) {
447   Mat_MPIKAIJ     *a;
448   Mat_MPIAIJ      *mpiaij;
449   PetscScalar     *T;
450   PetscInt         i, j;
451   PetscObjectState state;
452 
453   PetscFunctionBegin;
454   a      = (Mat_MPIKAIJ *)A->data;
455   mpiaij = (Mat_MPIAIJ *)a->A->data;
456 
457   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
458   if (state == a->state) {
459     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
460     PetscFunctionReturn(0);
461   } else {
462     PetscCall(MatDestroy(&a->AIJ));
463     PetscCall(MatDestroy(&a->OAIJ));
464     if (a->isTI) {
465       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
466        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
467        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
468        * to pass in. */
469       PetscCall(PetscMalloc1(a->p * a->q, &T));
470       for (i = 0; i < a->p; i++) {
471         for (j = 0; j < a->q; j++) {
472           if (i == j) T[i + j * a->p] = 1.0;
473           else T[i + j * a->p] = 0.0;
474         }
475       }
476     } else T = a->T;
477     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
478     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
479     if (a->isTI) PetscCall(PetscFree(T));
480     a->state = state;
481   }
482 
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode MatSetUp_KAIJ(Mat A) {
487   PetscInt     n;
488   PetscMPIInt  size;
489   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
490 
491   PetscFunctionBegin;
492   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
493   if (size == 1) {
494     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
495     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
496     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
497     PetscCall(PetscLayoutSetUp(A->rmap));
498     PetscCall(PetscLayoutSetUp(A->cmap));
499   } else {
500     Mat_MPIKAIJ *a;
501     Mat_MPIAIJ  *mpiaij;
502     IS           from, to;
503     Vec          gvec;
504 
505     a      = (Mat_MPIKAIJ *)A->data;
506     mpiaij = (Mat_MPIAIJ *)a->A->data;
507     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
508     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
509     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
510     PetscCall(PetscLayoutSetUp(A->rmap));
511     PetscCall(PetscLayoutSetUp(A->cmap));
512 
513     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
514 
515     PetscCall(VecGetSize(mpiaij->lvec, &n));
516     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
517     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
518     PetscCall(VecSetBlockSize(a->w, a->q));
519     PetscCall(VecSetType(a->w, VECSEQ));
520 
521     /* create two temporary Index sets for build scatter gather */
522     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
523     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
524 
525     /* create temporary global vector to generate scatter context */
526     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
527 
528     /* generate the scatter context */
529     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
530 
531     PetscCall(ISDestroy(&from));
532     PetscCall(ISDestroy(&to));
533     PetscCall(VecDestroy(&gvec));
534   }
535 
536   A->assembled = PETSC_TRUE;
537   PetscFunctionReturn(0);
538 }
539 
540 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) {
541   PetscViewerFormat format;
542   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
543   Mat               B;
544   PetscInt          i;
545   PetscBool         ismpikaij;
546 
547   PetscFunctionBegin;
548   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
549   PetscCall(PetscViewerGetFormat(viewer, &format));
550   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
551     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
552 
553     /* Print appropriate details for S. */
554     if (!a->S) {
555       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
556     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
557       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
558       for (i = 0; i < (a->p * a->q); i++) {
559 #if defined(PETSC_USE_COMPLEX)
560         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
561 #else
562         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
563 #endif
564       }
565       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
566     }
567 
568     /* Print appropriate details for T. */
569     if (a->isTI) {
570       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
571     } else if (!a->T) {
572       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
573     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
575       for (i = 0; i < (a->p * a->q); i++) {
576 #if defined(PETSC_USE_COMPLEX)
577         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
578 #else
579         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
580 #endif
581       }
582       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
583     }
584 
585     /* Now print details for the AIJ matrix, using the AIJ viewer. */
586     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
587     if (ismpikaij) {
588       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
589       PetscCall(MatView(b->A, viewer));
590     } else {
591       PetscCall(MatView(a->AIJ, viewer));
592     }
593 
594   } else {
595     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
596     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
597     PetscCall(MatView(B, viewer));
598     PetscCall(MatDestroy(&B));
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) {
604   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
605 
606   PetscFunctionBegin;
607   PetscCall(MatDestroy(&b->AIJ));
608   PetscCall(MatDestroy(&b->OAIJ));
609   PetscCall(MatDestroy(&b->A));
610   PetscCall(VecScatterDestroy(&b->ctx));
611   PetscCall(VecDestroy(&b->w));
612   PetscCall(PetscFree(b->S));
613   PetscCall(PetscFree(b->T));
614   PetscCall(PetscFree(b->ibdiag));
615   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
617   PetscCall(PetscFree(A->data));
618   PetscFunctionReturn(0);
619 }
620 
621 /* --------------------------------------------------------------------------------------*/
622 
623 /* zz = yy + Axx */
624 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
625   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
626   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
627   const PetscScalar *s = b->S, *t = b->T;
628   const PetscScalar *x, *v, *bx;
629   PetscScalar       *y, *sums;
630   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
631   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
632 
633   PetscFunctionBegin;
634   if (!yy) {
635     PetscCall(VecSet(zz, 0.0));
636   } else {
637     PetscCall(VecCopy(yy, zz));
638   }
639   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
640 
641   PetscCall(VecGetArrayRead(xx, &x));
642   PetscCall(VecGetArray(zz, &y));
643   idx = a->j;
644   v   = a->a;
645   ii  = a->i;
646 
647   if (b->isTI) {
648     for (i = 0; i < m; i++) {
649       jrow = ii[i];
650       n    = ii[i + 1] - jrow;
651       sums = y + p * i;
652       for (j = 0; j < n; j++) {
653         for (k = 0; k < p; k++) { sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; }
654       }
655     }
656     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
657   } else if (t) {
658     for (i = 0; i < m; i++) {
659       jrow = ii[i];
660       n    = ii[i + 1] - jrow;
661       sums = y + p * i;
662       for (j = 0; j < n; j++) {
663         for (k = 0; k < p; k++) {
664           for (l = 0; l < q; l++) { sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; }
665         }
666       }
667     }
668     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
669      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
670      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
671      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
672     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
673   }
674   if (s) {
675     for (i = 0; i < m; i++) {
676       sums = y + p * i;
677       bx   = x + q * i;
678       if (i < b->AIJ->cmap->n) {
679         for (j = 0; j < q; j++) {
680           for (k = 0; k < p; k++) { sums[k] += s[k + j * p] * bx[j]; }
681         }
682       }
683     }
684     PetscCall(PetscLogFlops(2.0 * m * p * q));
685   }
686 
687   PetscCall(VecRestoreArrayRead(xx, &x));
688   PetscCall(VecRestoreArray(zz, &y));
689   PetscFunctionReturn(0);
690 }
691 
692 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) {
693   PetscFunctionBegin;
694   PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy));
695   PetscFunctionReturn(0);
696 }
697 
698 #include <petsc/private/kernels/blockinvert.h>
699 
700 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) {
701   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
702   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
703   const PetscScalar *S = b->S;
704   const PetscScalar *T = b->T;
705   const PetscScalar *v = a->a;
706   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
707   PetscInt           i, j, *v_pivots, dof, dof2;
708   PetscScalar       *diag, aval, *v_work;
709 
710   PetscFunctionBegin;
711   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
712   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
713 
714   dof  = p;
715   dof2 = dof * dof;
716 
717   if (b->ibdiagvalid) {
718     if (values) *values = b->ibdiag;
719     PetscFunctionReturn(0);
720   }
721   if (!b->ibdiag) {
722     PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
723     PetscCall(PetscLogObjectMemory((PetscObject)A, dof2 * m * sizeof(PetscScalar)));
724   }
725   if (values) *values = b->ibdiag;
726   diag = b->ibdiag;
727 
728   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
729   for (i = 0; i < m; i++) {
730     if (S) {
731       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
732     } else {
733       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
734     }
735     if (b->isTI) {
736       aval = 0;
737       for (j = ii[i]; j < ii[i + 1]; j++)
738         if (idx[j] == i) aval = v[j];
739       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
740     } else if (T) {
741       aval = 0;
742       for (j = ii[i]; j < ii[i + 1]; j++)
743         if (idx[j] == i) aval = v[j];
744       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
745     }
746     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
747     diag += dof2;
748   }
749   PetscCall(PetscFree2(v_work, v_pivots));
750 
751   b->ibdiagvalid = PETSC_TRUE;
752   PetscFunctionReturn(0);
753 }
754 
755 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) {
756   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
757 
758   PetscFunctionBegin;
759   *B = kaij->AIJ;
760   PetscFunctionReturn(0);
761 }
762 
763 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
764   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
765   Mat            AIJ, OAIJ, B;
766   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
767   const PetscInt p = a->p, q = a->q;
768   PetscBool      ismpikaij, missing;
769 
770   PetscFunctionBegin;
771   if (reuse != MAT_REUSE_MATRIX) {
772     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
773     if (ismpikaij) {
774       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
775       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
776       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
777     } else {
778       AIJ  = a->AIJ;
779       OAIJ = NULL;
780     }
781     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
782     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
783     PetscCall(MatSetType(B, MATAIJ));
784     PetscCall(MatGetSize(AIJ, &m, NULL));
785     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
786     if (!missing || !a->S) d = m;
787     PetscCall(PetscMalloc1(m * p, &d_nnz));
788     for (i = 0; i < m; ++i) {
789       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
790       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
791       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
792     }
793     if (OAIJ) {
794       PetscCall(PetscMalloc1(m * p, &o_nnz));
795       for (i = 0; i < m; ++i) {
796         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
797         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
798         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
799       }
800       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
801     } else {
802       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
803     }
804     PetscCall(PetscFree(d_nnz));
805     PetscCall(PetscFree(o_nnz));
806   } else B = *newmat;
807   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
808   if (reuse == MAT_INPLACE_MATRIX) {
809     PetscCall(MatHeaderReplace(A, &B));
810   } else *newmat = B;
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
815   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
816   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
817   const PetscScalar *aa = a->a, *T = kaij->T, *v;
818   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
819   const PetscScalar *b, *xb, *idiag;
820   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
821   PetscInt           i, j, k, i2, bs, bs2, nz;
822 
823   PetscFunctionBegin;
824   its = its * lits;
825   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
826   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
827   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
828   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
829   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
830   bs  = p;
831   bs2 = bs * bs;
832 
833   if (!m) PetscFunctionReturn(0);
834 
835   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
836   idiag = kaij->ibdiag;
837   diag  = a->diag;
838 
839   if (!kaij->sor.setup) {
840     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
841     kaij->sor.setup = PETSC_TRUE;
842   }
843   y    = kaij->sor.y;
844   w    = kaij->sor.w;
845   work = kaij->sor.work;
846   t    = kaij->sor.t;
847   arr  = kaij->sor.arr;
848 
849   PetscCall(VecGetArray(xx, &x));
850   PetscCall(VecGetArrayRead(bb, &b));
851 
852   if (flag & SOR_ZERO_INITIAL_GUESS) {
853     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
854       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
855       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
856       i2 = bs;
857       idiag += bs2;
858       for (i = 1; i < m; i++) {
859         v  = aa + ai[i];
860         vi = aj + ai[i];
861         nz = diag[i] - ai[i];
862 
863         if (T) { /* b - T (Arow * x) */
864           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
865           for (j = 0; j < nz; j++) {
866             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
867           }
868           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
869           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
870         } else if (kaij->isTI) {
871           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
872           for (j = 0; j < nz; j++) {
873             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
874           }
875         } else {
876           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
877         }
878 
879         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
880         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
881 
882         idiag += bs2;
883         i2 += bs;
884       }
885       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
886       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
887       xb = t;
888     } else xb = b;
889     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
890       idiag = kaij->ibdiag + bs2 * (m - 1);
891       i2    = bs * (m - 1);
892       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
893       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
894       i2 -= bs;
895       idiag -= bs2;
896       for (i = m - 2; i >= 0; i--) {
897         v  = aa + diag[i] + 1;
898         vi = aj + diag[i] + 1;
899         nz = ai[i + 1] - diag[i] - 1;
900 
901         if (T) { /* FIXME: This branch untested */
902           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
903           /* copy all rows of x that are needed into contiguous space */
904           workt = work;
905           for (j = 0; j < nz; j++) {
906             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
907             workt += bs;
908           }
909           arrt = arr;
910           for (j = 0; j < nz; j++) {
911             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
912             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
913             arrt += bs2;
914           }
915           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
916         } else if (kaij->isTI) {
917           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
918           for (j = 0; j < nz; j++) {
919             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
920           }
921         }
922 
923         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
924         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
925 
926         idiag -= bs2;
927         i2 -= bs;
928       }
929       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
930     }
931     its--;
932   }
933   while (its--) { /* FIXME: This branch not updated */
934     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
935       i2    = 0;
936       idiag = kaij->ibdiag;
937       for (i = 0; i < m; i++) {
938         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
939 
940         v     = aa + ai[i];
941         vi    = aj + ai[i];
942         nz    = diag[i] - ai[i];
943         workt = work;
944         for (j = 0; j < nz; j++) {
945           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
946           workt += bs;
947         }
948         arrt = arr;
949         if (T) {
950           for (j = 0; j < nz; j++) {
951             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
952             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
953             arrt += bs2;
954           }
955           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
956         } else if (kaij->isTI) {
957           for (j = 0; j < nz; j++) {
958             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
959             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
960             arrt += bs2;
961           }
962           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
963         }
964         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
965 
966         v     = aa + diag[i] + 1;
967         vi    = aj + diag[i] + 1;
968         nz    = ai[i + 1] - diag[i] - 1;
969         workt = work;
970         for (j = 0; j < nz; j++) {
971           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
972           workt += bs;
973         }
974         arrt = arr;
975         if (T) {
976           for (j = 0; j < nz; j++) {
977             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
978             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
979             arrt += bs2;
980           }
981           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
982         } else if (kaij->isTI) {
983           for (j = 0; j < nz; j++) {
984             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
985             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
986             arrt += bs2;
987           }
988           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
989         }
990 
991         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
992         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
993 
994         idiag += bs2;
995         i2 += bs;
996       }
997       xb = t;
998     } else xb = b;
999     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1000       idiag = kaij->ibdiag + bs2 * (m - 1);
1001       i2    = bs * (m - 1);
1002       if (xb == b) {
1003         for (i = m - 1; i >= 0; i--) {
1004           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1005 
1006           v     = aa + ai[i];
1007           vi    = aj + ai[i];
1008           nz    = diag[i] - ai[i];
1009           workt = work;
1010           for (j = 0; j < nz; j++) {
1011             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1012             workt += bs;
1013           }
1014           arrt = arr;
1015           if (T) {
1016             for (j = 0; j < nz; j++) {
1017               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1018               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1019               arrt += bs2;
1020             }
1021             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1022           } else if (kaij->isTI) {
1023             for (j = 0; j < nz; j++) {
1024               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1025               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1026               arrt += bs2;
1027             }
1028             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1029           }
1030 
1031           v     = aa + diag[i] + 1;
1032           vi    = aj + diag[i] + 1;
1033           nz    = ai[i + 1] - diag[i] - 1;
1034           workt = work;
1035           for (j = 0; j < nz; j++) {
1036             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1037             workt += bs;
1038           }
1039           arrt = arr;
1040           if (T) {
1041             for (j = 0; j < nz; j++) {
1042               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1043               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1044               arrt += bs2;
1045             }
1046             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1047           } else if (kaij->isTI) {
1048             for (j = 0; j < nz; j++) {
1049               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1050               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1051               arrt += bs2;
1052             }
1053             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1054           }
1055 
1056           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1057           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1058         }
1059       } else {
1060         for (i = m - 1; i >= 0; i--) {
1061           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1062           v     = aa + diag[i] + 1;
1063           vi    = aj + diag[i] + 1;
1064           nz    = ai[i + 1] - diag[i] - 1;
1065           workt = work;
1066           for (j = 0; j < nz; j++) {
1067             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1068             workt += bs;
1069           }
1070           arrt = arr;
1071           if (T) {
1072             for (j = 0; j < nz; j++) {
1073               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1074               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1078           } else if (kaij->isTI) {
1079             for (j = 0; j < nz; j++) {
1080               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1081               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1082               arrt += bs2;
1083             }
1084             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1085           }
1086           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1087           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1088         }
1089       }
1090       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1091     }
1092   }
1093 
1094   PetscCall(VecRestoreArray(xx, &x));
1095   PetscCall(VecRestoreArrayRead(bb, &b));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*===================================================================================*/
1100 
1101 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1102   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1103 
1104   PetscFunctionBegin;
1105   if (!yy) {
1106     PetscCall(VecSet(zz, 0.0));
1107   } else {
1108     PetscCall(VecCopy(yy, zz));
1109   }
1110   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1111   /* start the scatter */
1112   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1113   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1114   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1115   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) {
1120   PetscFunctionBegin;
1121   PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy));
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) {
1126   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1127 
1128   PetscFunctionBegin;
1129   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1130   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 /* ----------------------------------------------------------------*/
1135 
1136 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1137   Mat_SeqKAIJ   *b    = (Mat_SeqKAIJ *)A->data;
1138   PetscErrorCode diag = PETSC_FALSE;
1139   PetscInt       nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1140   PetscScalar   *vaij, *v, *S = b->S, *T = b->T;
1141 
1142   PetscFunctionBegin;
1143   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1144   b->getrowactive = PETSC_TRUE;
1145   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1146 
1147   if ((!S) && (!T) && (!b->isTI)) {
1148     if (ncols) *ncols = 0;
1149     if (cols) *cols = NULL;
1150     if (values) *values = NULL;
1151     PetscFunctionReturn(0);
1152   }
1153 
1154   if (T || b->isTI) {
1155     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1156     c = nzaij;
1157     for (i = 0; i < nzaij; i++) {
1158       /* check if this row contains a diagonal entry */
1159       if (colsaij[i] == r) {
1160         diag = PETSC_TRUE;
1161         c    = i;
1162       }
1163     }
1164   } else nzaij = c = 0;
1165 
1166   /* calculate size of row */
1167   nz = 0;
1168   if (S) nz += q;
1169   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1170 
1171   if (cols || values) {
1172     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1173     for (i = 0; i < q; i++) {
1174       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1175       v[i] = 0.0;
1176     }
1177     if (b->isTI) {
1178       for (i = 0; i < nzaij; i++) {
1179         for (j = 0; j < q; j++) {
1180           idx[i * q + j] = colsaij[i] * q + j;
1181           v[i * q + j]   = (j == s ? vaij[i] : 0);
1182         }
1183       }
1184     } else if (T) {
1185       for (i = 0; i < nzaij; i++) {
1186         for (j = 0; j < q; j++) {
1187           idx[i * q + j] = colsaij[i] * q + j;
1188           v[i * q + j]   = vaij[i] * T[s + j * p];
1189         }
1190       }
1191     }
1192     if (S) {
1193       for (j = 0; j < q; j++) {
1194         idx[c * q + j] = r * q + j;
1195         v[c * q + j] += S[s + j * p];
1196       }
1197     }
1198   }
1199 
1200   if (ncols) *ncols = nz;
1201   if (cols) *cols = idx;
1202   if (values) *values = v;
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1207   PetscFunctionBegin;
1208   if (nz) *nz = 0;
1209   PetscCall(PetscFree2(*idx, *v));
1210   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1215   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1216   Mat            AIJ  = b->A;
1217   PetscBool      diag = PETSC_FALSE;
1218   Mat            MatAIJ, MatOAIJ;
1219   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1220   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1221   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1225   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1226   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1227   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1228   b->getrowactive = PETSC_TRUE;
1229   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1230   lrow = row - rstart;
1231 
1232   if ((!S) && (!T) && (!b->isTI)) {
1233     if (ncols) *ncols = 0;
1234     if (cols) *cols = NULL;
1235     if (values) *values = NULL;
1236     PetscFunctionReturn(0);
1237   }
1238 
1239   r = lrow / p;
1240   s = lrow % p;
1241 
1242   if (T || b->isTI) {
1243     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1244     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1245     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1246 
1247     c = ncolsaij + ncolsoaij;
1248     for (i = 0; i < ncolsaij; i++) {
1249       /* check if this row contains a diagonal entry */
1250       if (colsaij[i] == r) {
1251         diag = PETSC_TRUE;
1252         c    = i;
1253       }
1254     }
1255   } else c = 0;
1256 
1257   /* calculate size of row */
1258   nz = 0;
1259   if (S) nz += q;
1260   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1261 
1262   if (cols || values) {
1263     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1264     for (i = 0; i < q; i++) {
1265       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1266       v[i] = 0.0;
1267     }
1268     if (b->isTI) {
1269       for (i = 0; i < ncolsaij; i++) {
1270         for (j = 0; j < q; j++) {
1271           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1272           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1273         }
1274       }
1275       for (i = 0; i < ncolsoaij; i++) {
1276         for (j = 0; j < q; j++) {
1277           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1278           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1279         }
1280       }
1281     } else if (T) {
1282       for (i = 0; i < ncolsaij; i++) {
1283         for (j = 0; j < q; j++) {
1284           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1285           v[i * q + j]   = vals[i] * T[s + j * p];
1286         }
1287       }
1288       for (i = 0; i < ncolsoaij; i++) {
1289         for (j = 0; j < q; j++) {
1290           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1291           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1292         }
1293       }
1294     }
1295     if (S) {
1296       for (j = 0; j < q; j++) {
1297         idx[c * q + j] = (r + rstart / p) * q + j;
1298         v[c * q + j] += S[s + j * p];
1299       }
1300     }
1301   }
1302 
1303   if (ncols) *ncols = nz;
1304   if (cols) *cols = idx;
1305   if (values) *values = v;
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1310   PetscFunctionBegin;
1311   PetscCall(PetscFree2(*idx, *v));
1312   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
1317   Mat A;
1318 
1319   PetscFunctionBegin;
1320   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1321   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1322   PetscCall(MatDestroy(&A));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* ---------------------------------------------------------------------------------- */
1327 /*@C
1328   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1329 
1330     [I \otimes S + A \otimes T]
1331 
1332   where
1333     S is a dense (p \times q) matrix
1334     T is a dense (p \times q) matrix
1335     A is an AIJ  (n \times n) matrix
1336     I is the identity matrix
1337   The resulting matrix is (np \times nq)
1338 
1339   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1340 
1341   Collective
1342 
1343   Input Parameters:
1344 + A - the AIJ matrix
1345 . p - number of rows in S and T
1346 . q - number of columns in S and T
1347 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1348 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1349 
1350   Output Parameter:
1351 . kaij - the new KAIJ matrix
1352 
1353   Notes:
1354   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1355   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1356 
1357   Developer Notes:
1358   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1359   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1360   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1361   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1362 
1363   Level: advanced
1364 
1365 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1366 @*/
1367 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) {
1368   PetscFunctionBegin;
1369   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1370   PetscCall(MatSetType(*kaij, MATKAIJ));
1371   PetscCall(MatKAIJSetAIJ(*kaij, A));
1372   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1373   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1374   PetscCall(MatSetUp(*kaij));
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 /*MC
1379   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1380     [I \otimes S + A \otimes T],
1381   where
1382     S is a dense (p \times q) matrix,
1383     T is a dense (p \times q) matrix,
1384     A is an AIJ  (n \times n) matrix,
1385     and I is the identity matrix.
1386   The resulting matrix is (np \times nq).
1387 
1388   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1389 
1390   Notes:
1391   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1392   where x and b are column vectors containing the row-major representations of X and B.
1393 
1394   Level: advanced
1395 
1396 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1397 M*/
1398 
1399 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) {
1400   Mat_MPIKAIJ *b;
1401   PetscMPIInt  size;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(PetscNewLog(A, &b));
1405   A->data = (void *)b;
1406 
1407   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1408 
1409   b->w = NULL;
1410   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1411   if (size == 1) {
1412     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1413     A->ops->destroy             = MatDestroy_SeqKAIJ;
1414     A->ops->mult                = MatMult_SeqKAIJ;
1415     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1416     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1417     A->ops->getrow              = MatGetRow_SeqKAIJ;
1418     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1419     A->ops->sor                 = MatSOR_SeqKAIJ;
1420     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1421   } else {
1422     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1423     A->ops->destroy             = MatDestroy_MPIKAIJ;
1424     A->ops->mult                = MatMult_MPIKAIJ;
1425     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1426     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1427     A->ops->getrow              = MatGetRow_MPIKAIJ;
1428     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1429     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1430     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1431   }
1432   A->ops->setup           = MatSetUp_KAIJ;
1433   A->ops->view            = MatView_KAIJ;
1434   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1435   PetscFunctionReturn(0);
1436 }
1437