xref: /petsc/src/mat/impls/kaij/kaij.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32 
33    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34 
35    Input Parameter:
36 .  A - the `MATKAIJ` matrix
37 
38    Output Parameter:
39 .  B - the `MATAIJ` matrix
40 
41    Level: advanced
42 
43    Note:
44    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45 
46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47 @*/
48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the `MATKAIJ` matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note:
80    All output parameters are optional (pass NULL if not desired)
81 
82    Level: advanced
83 
84 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the `MATKAIJ` matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note:
109    All output parameters are optional (pass NULL if not desired)
110 
111    Level: advanced
112 
113 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the `MATKAIJ` matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with `MatKAIJGetS()`
134 
135   Note:
136   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 
141 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142 @*/
143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscCall(PetscObjectStateIncrease((PetscObject)A));
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the `MATKAIJ` matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with `MatKAIJGetS()`
160 
161   Note:
162   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
163   If NULL is passed, it will not attempt to zero the array pointer.
164 
165   Level: advanced
166 
167 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) {
170   PetscFunctionBegin;
171   if (S) *S = NULL;
172   PetscFunctionReturn(0);
173 }
174 
175 /*@C
176    MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix
177 
178    Not Collective; the entire T is stored and returned independently on all processes
179 
180    Input Parameter:
181 .  A - the `MATKAIJ` matrix
182 
183    Output Parameters:
184 +  m - the number of rows in T
185 .  n - the number of columns in T
186 -  T - the T matrix, in form of a scalar array in column-major format
187 
188    Note:
189    All output parameters are optional (pass NULL if not desired)
190 
191    Level: advanced
192 
193 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) {
196   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
197   PetscFunctionBegin;
198   if (m) *m = b->p;
199   if (n) *n = b->q;
200   if (T) *T = b->T;
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix
206 
207    Not Collective; the entire T is stored and returned independently on all processes
208 
209    Input Parameter:
210 .  A - the `MATKAIJ` matrix
211 
212    Output Parameters:
213 +  m - the number of rows in T
214 .  n - the number of columns in T
215 -  T - the T matrix, in form of a scalar array in column-major format
216 
217    Note: All output parameters are optional (pass NULL if not desired)
218 
219    Level: advanced
220 
221 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
222 @*/
223 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the `MATKAIJ` matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with `MatKAIJGetS()`
242 
243   Note:
244   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
245   If NULL is passed, it will not attempt to zero the array pointer.
246 
247   Level: advanced
248 
249 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) {
252   PetscFunctionBegin;
253   if (T) *T = NULL;
254   PetscCall(PetscObjectStateIncrease((PetscObject)A));
255   PetscFunctionReturn(0);
256 }
257 
258 /*@C
259   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
260 
261   Not collective
262 
263   Input Parameter:
264 . A - the `MATKAIJ` matrix
265 
266   Output Parameter:
267 . T - location of pointer to array obtained with `MatKAIJGetS()`
268 
269   Note:
270   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
271   If NULL is passed, it will not attempt to zero the array pointer.
272 
273   Level: advanced
274 
275 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
276 @*/
277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) {
278   PetscFunctionBegin;
279   if (T) *T = NULL;
280   PetscFunctionReturn(0);
281 }
282 
283 /*@
284    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
285 
286    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
287 
288    Input Parameters:
289 +  A - the `MATKAIJ` matrix
290 -  B - the `MATAIJ` matrix
291 
292    Notes:
293    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
294 
295    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
296 
297    Level: advanced
298 
299 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
300 @*/
301 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) {
302   PetscMPIInt size;
303   PetscBool   flg;
304 
305   PetscFunctionBegin;
306   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
307   if (size == 1) {
308     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
309     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
311     a->AIJ         = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
314     a->A           = B;
315   }
316   PetscCall(PetscObjectReference((PetscObject)B));
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the `MATKAIJ` matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes:
332    The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix.
333 
334    The S matrix is copied, so the user can destroy this array.
335 
336    Level: advanced
337 
338 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
339 @*/
340 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) {
341   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
342 
343   PetscFunctionBegin;
344   PetscCall(PetscFree(a->S));
345   if (S) {
346     PetscCall(PetscMalloc1(p * q, &a->S));
347     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
348   } else a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the `MATKAIJ` matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) {
371   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
372   PetscInt     i, j;
373 
374   PetscFunctionBegin;
375   if (a->p != a->q) {
376     *identity = PETSC_FALSE;
377     PetscFunctionReturn(0);
378   } else *identity = PETSC_TRUE;
379   if (!a->isTI || a->S) {
380     for (i = 0; i < a->p && *identity; i++) {
381       for (j = 0; j < a->p && *identity; j++) {
382         if (i != j) {
383           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         } else {
386           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
387           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   PetscFunctionReturn(0);
393 }
394 
395 /*@C
396    MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix
397 
398    Logically Collective; the entire T is stored independently on all processes.
399 
400    Input Parameters:
401 +  A - the `MATKAIJ` matrix
402 .  p - the number of rows in S
403 .  q - the number of columns in S
404 -  T - the T matrix, in form of a scalar array in column-major format
405 
406    Notes:
407    The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix.
408 
409    The T matrix is copied, so the user can destroy this array.
410 
411    Level: Advanced
412 
413 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
414 @*/
415 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) {
416   PetscInt     i, j;
417   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
418   PetscBool    isTI = PETSC_FALSE;
419 
420   PetscFunctionBegin;
421   /* check if T is an identity matrix */
422   if (T && (p == q)) {
423     isTI = PETSC_TRUE;
424     for (i = 0; i < p; i++) {
425       for (j = 0; j < q; j++) {
426         if (i == j) {
427           /* diagonal term must be 1 */
428           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
429         } else {
430           /* off-diagonal term must be 0 */
431           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
432         }
433       }
434     }
435   }
436   a->isTI = isTI;
437 
438   PetscCall(PetscFree(a->T));
439   if (T && (!isTI)) {
440     PetscCall(PetscMalloc1(p * q, &a->T));
441     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
442   } else a->T = NULL;
443 
444   a->p = p;
445   a->q = q;
446   PetscFunctionReturn(0);
447 }
448 
449 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) {
450   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
451 
452   PetscFunctionBegin;
453   PetscCall(MatDestroy(&b->AIJ));
454   PetscCall(PetscFree(b->S));
455   PetscCall(PetscFree(b->T));
456   PetscCall(PetscFree(b->ibdiag));
457   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
458   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
459   PetscCall(PetscFree(A->data));
460   PetscFunctionReturn(0);
461 }
462 
463 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) {
464   Mat_MPIKAIJ     *a;
465   Mat_MPIAIJ      *mpiaij;
466   PetscScalar     *T;
467   PetscInt         i, j;
468   PetscObjectState state;
469 
470   PetscFunctionBegin;
471   a      = (Mat_MPIKAIJ *)A->data;
472   mpiaij = (Mat_MPIAIJ *)a->A->data;
473 
474   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
475   if (state == a->state) {
476     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
477     PetscFunctionReturn(0);
478   } else {
479     PetscCall(MatDestroy(&a->AIJ));
480     PetscCall(MatDestroy(&a->OAIJ));
481     if (a->isTI) {
482       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
483        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
484        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
485        * to pass in. */
486       PetscCall(PetscMalloc1(a->p * a->q, &T));
487       for (i = 0; i < a->p; i++) {
488         for (j = 0; j < a->q; j++) {
489           if (i == j) T[i + j * a->p] = 1.0;
490           else T[i + j * a->p] = 0.0;
491         }
492       }
493     } else T = a->T;
494     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
495     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
496     if (a->isTI) PetscCall(PetscFree(T));
497     a->state = state;
498   }
499 
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatSetUp_KAIJ(Mat A) {
504   PetscInt     n;
505   PetscMPIInt  size;
506   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
507 
508   PetscFunctionBegin;
509   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
510   if (size == 1) {
511     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
512     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
513     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
514     PetscCall(PetscLayoutSetUp(A->rmap));
515     PetscCall(PetscLayoutSetUp(A->cmap));
516   } else {
517     Mat_MPIKAIJ *a;
518     Mat_MPIAIJ  *mpiaij;
519     IS           from, to;
520     Vec          gvec;
521 
522     a      = (Mat_MPIKAIJ *)A->data;
523     mpiaij = (Mat_MPIAIJ *)a->A->data;
524     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
525     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
526     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
527     PetscCall(PetscLayoutSetUp(A->rmap));
528     PetscCall(PetscLayoutSetUp(A->cmap));
529 
530     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
531 
532     PetscCall(VecGetSize(mpiaij->lvec, &n));
533     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
534     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
535     PetscCall(VecSetBlockSize(a->w, a->q));
536     PetscCall(VecSetType(a->w, VECSEQ));
537 
538     /* create two temporary Index sets for build scatter gather */
539     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
540     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
541 
542     /* create temporary global vector to generate scatter context */
543     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
544 
545     /* generate the scatter context */
546     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
547 
548     PetscCall(ISDestroy(&from));
549     PetscCall(ISDestroy(&to));
550     PetscCall(VecDestroy(&gvec));
551   }
552 
553   A->assembled = PETSC_TRUE;
554   PetscFunctionReturn(0);
555 }
556 
557 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) {
558   PetscViewerFormat format;
559   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
560   Mat               B;
561   PetscInt          i;
562   PetscBool         ismpikaij;
563 
564   PetscFunctionBegin;
565   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
566   PetscCall(PetscViewerGetFormat(viewer, &format));
567   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
569 
570     /* Print appropriate details for S. */
571     if (!a->S) {
572       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
573     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
575       for (i = 0; i < (a->p * a->q); i++) {
576 #if defined(PETSC_USE_COMPLEX)
577         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
578 #else
579         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
580 #endif
581       }
582       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
583     }
584 
585     /* Print appropriate details for T. */
586     if (a->isTI) {
587       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
588     } else if (!a->T) {
589       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
590     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
592       for (i = 0; i < (a->p * a->q); i++) {
593 #if defined(PETSC_USE_COMPLEX)
594         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
595 #else
596         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
597 #endif
598       }
599       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
600     }
601 
602     /* Now print details for the AIJ matrix, using the AIJ viewer. */
603     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
604     if (ismpikaij) {
605       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
606       PetscCall(MatView(b->A, viewer));
607     } else {
608       PetscCall(MatView(a->AIJ, viewer));
609     }
610 
611   } else {
612     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
614     PetscCall(MatView(B, viewer));
615     PetscCall(MatDestroy(&B));
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) {
621   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
622 
623   PetscFunctionBegin;
624   PetscCall(MatDestroy(&b->AIJ));
625   PetscCall(MatDestroy(&b->OAIJ));
626   PetscCall(MatDestroy(&b->A));
627   PetscCall(VecScatterDestroy(&b->ctx));
628   PetscCall(VecDestroy(&b->w));
629   PetscCall(PetscFree(b->S));
630   PetscCall(PetscFree(b->T));
631   PetscCall(PetscFree(b->ibdiag));
632   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
634   PetscCall(PetscFree(A->data));
635   PetscFunctionReturn(0);
636 }
637 
638 /* --------------------------------------------------------------------------------------*/
639 
640 /* zz = yy + Axx */
641 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
642   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
643   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
644   const PetscScalar *s = b->S, *t = b->T;
645   const PetscScalar *x, *v, *bx;
646   PetscScalar       *y, *sums;
647   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
648   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
649 
650   PetscFunctionBegin;
651   if (!yy) {
652     PetscCall(VecSet(zz, 0.0));
653   } else {
654     PetscCall(VecCopy(yy, zz));
655   }
656   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
657 
658   PetscCall(VecGetArrayRead(xx, &x));
659   PetscCall(VecGetArray(zz, &y));
660   idx = a->j;
661   v   = a->a;
662   ii  = a->i;
663 
664   if (b->isTI) {
665     for (i = 0; i < m; i++) {
666       jrow = ii[i];
667       n    = ii[i + 1] - jrow;
668       sums = y + p * i;
669       for (j = 0; j < n; j++) {
670         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
671       }
672     }
673     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
674   } else if (t) {
675     for (i = 0; i < m; i++) {
676       jrow = ii[i];
677       n    = ii[i + 1] - jrow;
678       sums = y + p * i;
679       for (j = 0; j < n; j++) {
680         for (k = 0; k < p; k++) {
681           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
682         }
683       }
684     }
685     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
686      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
687      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
688      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
689     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
690   }
691   if (s) {
692     for (i = 0; i < m; i++) {
693       sums = y + p * i;
694       bx   = x + q * i;
695       if (i < b->AIJ->cmap->n) {
696         for (j = 0; j < q; j++) {
697           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
698         }
699       }
700     }
701     PetscCall(PetscLogFlops(2.0 * m * p * q));
702   }
703 
704   PetscCall(VecRestoreArrayRead(xx, &x));
705   PetscCall(VecRestoreArray(zz, &y));
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) {
710   PetscFunctionBegin;
711   PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy));
712   PetscFunctionReturn(0);
713 }
714 
715 #include <petsc/private/kernels/blockinvert.h>
716 
717 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) {
718   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
719   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
720   const PetscScalar *S = b->S;
721   const PetscScalar *T = b->T;
722   const PetscScalar *v = a->a;
723   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724   PetscInt           i, j, *v_pivots, dof, dof2;
725   PetscScalar       *diag, aval, *v_work;
726 
727   PetscFunctionBegin;
728   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
729   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
730 
731   dof  = p;
732   dof2 = dof * dof;
733 
734   if (b->ibdiagvalid) {
735     if (values) *values = b->ibdiag;
736     PetscFunctionReturn(0);
737   }
738   if (!b->ibdiag) { PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); }
739   if (values) *values = b->ibdiag;
740   diag = b->ibdiag;
741 
742   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
743   for (i = 0; i < m; i++) {
744     if (S) {
745       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
746     } else {
747       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
748     }
749     if (b->isTI) {
750       aval = 0;
751       for (j = ii[i]; j < ii[i + 1]; j++)
752         if (idx[j] == i) aval = v[j];
753       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
754     } else if (T) {
755       aval = 0;
756       for (j = ii[i]; j < ii[i + 1]; j++)
757         if (idx[j] == i) aval = v[j];
758       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
759     }
760     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
761     diag += dof2;
762   }
763   PetscCall(PetscFree2(v_work, v_pivots));
764 
765   b->ibdiagvalid = PETSC_TRUE;
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) {
770   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
771 
772   PetscFunctionBegin;
773   *B = kaij->AIJ;
774   PetscFunctionReturn(0);
775 }
776 
777 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
778   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
779   Mat            AIJ, OAIJ, B;
780   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
781   const PetscInt p = a->p, q = a->q;
782   PetscBool      ismpikaij, missing;
783 
784   PetscFunctionBegin;
785   if (reuse != MAT_REUSE_MATRIX) {
786     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
787     if (ismpikaij) {
788       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
789       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
790       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
791     } else {
792       AIJ  = a->AIJ;
793       OAIJ = NULL;
794     }
795     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
796     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
797     PetscCall(MatSetType(B, MATAIJ));
798     PetscCall(MatGetSize(AIJ, &m, NULL));
799     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
800     if (!missing || !a->S) d = m;
801     PetscCall(PetscMalloc1(m * p, &d_nnz));
802     for (i = 0; i < m; ++i) {
803       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
804       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
805       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
806     }
807     if (OAIJ) {
808       PetscCall(PetscMalloc1(m * p, &o_nnz));
809       for (i = 0; i < m; ++i) {
810         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
811         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
812         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
813       }
814       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
815     } else {
816       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
817     }
818     PetscCall(PetscFree(d_nnz));
819     PetscCall(PetscFree(o_nnz));
820   } else B = *newmat;
821   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
822   if (reuse == MAT_INPLACE_MATRIX) {
823     PetscCall(MatHeaderReplace(A, &B));
824   } else *newmat = B;
825   PetscFunctionReturn(0);
826 }
827 
828 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
829   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
830   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
831   const PetscScalar *aa = a->a, *T = kaij->T, *v;
832   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
833   const PetscScalar *b, *xb, *idiag;
834   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
835   PetscInt           i, j, k, i2, bs, bs2, nz;
836 
837   PetscFunctionBegin;
838   its = its * lits;
839   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
840   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
841   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
842   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
843   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
844   bs  = p;
845   bs2 = bs * bs;
846 
847   if (!m) PetscFunctionReturn(0);
848 
849   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
850   idiag = kaij->ibdiag;
851   diag  = a->diag;
852 
853   if (!kaij->sor.setup) {
854     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
855     kaij->sor.setup = PETSC_TRUE;
856   }
857   y    = kaij->sor.y;
858   w    = kaij->sor.w;
859   work = kaij->sor.work;
860   t    = kaij->sor.t;
861   arr  = kaij->sor.arr;
862 
863   PetscCall(VecGetArray(xx, &x));
864   PetscCall(VecGetArrayRead(bb, &b));
865 
866   if (flag & SOR_ZERO_INITIAL_GUESS) {
867     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
868       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
869       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
870       i2 = bs;
871       idiag += bs2;
872       for (i = 1; i < m; i++) {
873         v  = aa + ai[i];
874         vi = aj + ai[i];
875         nz = diag[i] - ai[i];
876 
877         if (T) { /* b - T (Arow * x) */
878           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
879           for (j = 0; j < nz; j++) {
880             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
881           }
882           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
883           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
884         } else if (kaij->isTI) {
885           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
886           for (j = 0; j < nz; j++) {
887             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
888           }
889         } else {
890           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
891         }
892 
893         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
894         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
895 
896         idiag += bs2;
897         i2 += bs;
898       }
899       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
900       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
901       xb = t;
902     } else xb = b;
903     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904       idiag = kaij->ibdiag + bs2 * (m - 1);
905       i2    = bs * (m - 1);
906       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
907       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
908       i2 -= bs;
909       idiag -= bs2;
910       for (i = m - 2; i >= 0; i--) {
911         v  = aa + diag[i] + 1;
912         vi = aj + diag[i] + 1;
913         nz = ai[i + 1] - diag[i] - 1;
914 
915         if (T) { /* FIXME: This branch untested */
916           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
917           /* copy all rows of x that are needed into contiguous space */
918           workt = work;
919           for (j = 0; j < nz; j++) {
920             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
921             workt += bs;
922           }
923           arrt = arr;
924           for (j = 0; j < nz; j++) {
925             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
926             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
927             arrt += bs2;
928           }
929           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
930         } else if (kaij->isTI) {
931           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
932           for (j = 0; j < nz; j++) {
933             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
934           }
935         }
936 
937         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
938         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
939 
940         idiag -= bs2;
941         i2 -= bs;
942       }
943       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
944     }
945     its--;
946   }
947   while (its--) { /* FIXME: This branch not updated */
948     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
949       i2    = 0;
950       idiag = kaij->ibdiag;
951       for (i = 0; i < m; i++) {
952         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
953 
954         v     = aa + ai[i];
955         vi    = aj + ai[i];
956         nz    = diag[i] - ai[i];
957         workt = work;
958         for (j = 0; j < nz; j++) {
959           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
960           workt += bs;
961         }
962         arrt = arr;
963         if (T) {
964           for (j = 0; j < nz; j++) {
965             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
966             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
967             arrt += bs2;
968           }
969           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
970         } else if (kaij->isTI) {
971           for (j = 0; j < nz; j++) {
972             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
973             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
974             arrt += bs2;
975           }
976           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
977         }
978         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
979 
980         v     = aa + diag[i] + 1;
981         vi    = aj + diag[i] + 1;
982         nz    = ai[i + 1] - diag[i] - 1;
983         workt = work;
984         for (j = 0; j < nz; j++) {
985           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
986           workt += bs;
987         }
988         arrt = arr;
989         if (T) {
990           for (j = 0; j < nz; j++) {
991             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
992             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
993             arrt += bs2;
994           }
995           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
996         } else if (kaij->isTI) {
997           for (j = 0; j < nz; j++) {
998             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
999             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1000             arrt += bs2;
1001           }
1002           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1003         }
1004 
1005         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1006         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1007 
1008         idiag += bs2;
1009         i2 += bs;
1010       }
1011       xb = t;
1012     } else xb = b;
1013     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1014       idiag = kaij->ibdiag + bs2 * (m - 1);
1015       i2    = bs * (m - 1);
1016       if (xb == b) {
1017         for (i = m - 1; i >= 0; i--) {
1018           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1019 
1020           v     = aa + ai[i];
1021           vi    = aj + ai[i];
1022           nz    = diag[i] - ai[i];
1023           workt = work;
1024           for (j = 0; j < nz; j++) {
1025             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1026             workt += bs;
1027           }
1028           arrt = arr;
1029           if (T) {
1030             for (j = 0; j < nz; j++) {
1031               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1032               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1033               arrt += bs2;
1034             }
1035             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1036           } else if (kaij->isTI) {
1037             for (j = 0; j < nz; j++) {
1038               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1039               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1040               arrt += bs2;
1041             }
1042             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1043           }
1044 
1045           v     = aa + diag[i] + 1;
1046           vi    = aj + diag[i] + 1;
1047           nz    = ai[i + 1] - diag[i] - 1;
1048           workt = work;
1049           for (j = 0; j < nz; j++) {
1050             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1051             workt += bs;
1052           }
1053           arrt = arr;
1054           if (T) {
1055             for (j = 0; j < nz; j++) {
1056               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1057               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1058               arrt += bs2;
1059             }
1060             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1061           } else if (kaij->isTI) {
1062             for (j = 0; j < nz; j++) {
1063               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1064               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1065               arrt += bs2;
1066             }
1067             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1068           }
1069 
1070           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1071           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1072         }
1073       } else {
1074         for (i = m - 1; i >= 0; i--) {
1075           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1076           v     = aa + diag[i] + 1;
1077           vi    = aj + diag[i] + 1;
1078           nz    = ai[i + 1] - diag[i] - 1;
1079           workt = work;
1080           for (j = 0; j < nz; j++) {
1081             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1082             workt += bs;
1083           }
1084           arrt = arr;
1085           if (T) {
1086             for (j = 0; j < nz; j++) {
1087               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1088               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1089               arrt += bs2;
1090             }
1091             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1092           } else if (kaij->isTI) {
1093             for (j = 0; j < nz; j++) {
1094               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1095               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1096               arrt += bs2;
1097             }
1098             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1099           }
1100           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1101           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1102         }
1103       }
1104       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1105     }
1106   }
1107 
1108   PetscCall(VecRestoreArray(xx, &x));
1109   PetscCall(VecRestoreArrayRead(bb, &b));
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 /*===================================================================================*/
1114 
1115 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1116   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1117 
1118   PetscFunctionBegin;
1119   if (!yy) {
1120     PetscCall(VecSet(zz, 0.0));
1121   } else {
1122     PetscCall(VecCopy(yy, zz));
1123   }
1124   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1125   /* start the scatter */
1126   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1127   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1128   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1129   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) {
1134   PetscFunctionBegin;
1135   PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy));
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) {
1140   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1141 
1142   PetscFunctionBegin;
1143   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1144   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /* ----------------------------------------------------------------*/
1149 
1150 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1151   Mat_SeqKAIJ   *b    = (Mat_SeqKAIJ *)A->data;
1152   PetscErrorCode diag = PETSC_FALSE;
1153   PetscInt       nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1154   PetscScalar   *vaij, *v, *S = b->S, *T = b->T;
1155 
1156   PetscFunctionBegin;
1157   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1158   b->getrowactive = PETSC_TRUE;
1159   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1160 
1161   if ((!S) && (!T) && (!b->isTI)) {
1162     if (ncols) *ncols = 0;
1163     if (cols) *cols = NULL;
1164     if (values) *values = NULL;
1165     PetscFunctionReturn(0);
1166   }
1167 
1168   if (T || b->isTI) {
1169     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1170     c = nzaij;
1171     for (i = 0; i < nzaij; i++) {
1172       /* check if this row contains a diagonal entry */
1173       if (colsaij[i] == r) {
1174         diag = PETSC_TRUE;
1175         c    = i;
1176       }
1177     }
1178   } else nzaij = c = 0;
1179 
1180   /* calculate size of row */
1181   nz = 0;
1182   if (S) nz += q;
1183   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1184 
1185   if (cols || values) {
1186     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1187     for (i = 0; i < q; i++) {
1188       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1189       v[i] = 0.0;
1190     }
1191     if (b->isTI) {
1192       for (i = 0; i < nzaij; i++) {
1193         for (j = 0; j < q; j++) {
1194           idx[i * q + j] = colsaij[i] * q + j;
1195           v[i * q + j]   = (j == s ? vaij[i] : 0);
1196         }
1197       }
1198     } else if (T) {
1199       for (i = 0; i < nzaij; i++) {
1200         for (j = 0; j < q; j++) {
1201           idx[i * q + j] = colsaij[i] * q + j;
1202           v[i * q + j]   = vaij[i] * T[s + j * p];
1203         }
1204       }
1205     }
1206     if (S) {
1207       for (j = 0; j < q; j++) {
1208         idx[c * q + j] = r * q + j;
1209         v[c * q + j] += S[s + j * p];
1210       }
1211     }
1212   }
1213 
1214   if (ncols) *ncols = nz;
1215   if (cols) *cols = idx;
1216   if (values) *values = v;
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1221   PetscFunctionBegin;
1222   if (nz) *nz = 0;
1223   PetscCall(PetscFree2(*idx, *v));
1224   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1229   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1230   Mat            AIJ  = b->A;
1231   PetscBool      diag = PETSC_FALSE;
1232   Mat            MatAIJ, MatOAIJ;
1233   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1234   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1235   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1239   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1240   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1241   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1242   b->getrowactive = PETSC_TRUE;
1243   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1244   lrow = row - rstart;
1245 
1246   if ((!S) && (!T) && (!b->isTI)) {
1247     if (ncols) *ncols = 0;
1248     if (cols) *cols = NULL;
1249     if (values) *values = NULL;
1250     PetscFunctionReturn(0);
1251   }
1252 
1253   r = lrow / p;
1254   s = lrow % p;
1255 
1256   if (T || b->isTI) {
1257     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1258     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1259     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1260 
1261     c = ncolsaij + ncolsoaij;
1262     for (i = 0; i < ncolsaij; i++) {
1263       /* check if this row contains a diagonal entry */
1264       if (colsaij[i] == r) {
1265         diag = PETSC_TRUE;
1266         c    = i;
1267       }
1268     }
1269   } else c = 0;
1270 
1271   /* calculate size of row */
1272   nz = 0;
1273   if (S) nz += q;
1274   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1275 
1276   if (cols || values) {
1277     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1278     for (i = 0; i < q; i++) {
1279       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1280       v[i] = 0.0;
1281     }
1282     if (b->isTI) {
1283       for (i = 0; i < ncolsaij; i++) {
1284         for (j = 0; j < q; j++) {
1285           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1286           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1287         }
1288       }
1289       for (i = 0; i < ncolsoaij; i++) {
1290         for (j = 0; j < q; j++) {
1291           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1292           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1293         }
1294       }
1295     } else if (T) {
1296       for (i = 0; i < ncolsaij; i++) {
1297         for (j = 0; j < q; j++) {
1298           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1299           v[i * q + j]   = vals[i] * T[s + j * p];
1300         }
1301       }
1302       for (i = 0; i < ncolsoaij; i++) {
1303         for (j = 0; j < q; j++) {
1304           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1305           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1306         }
1307       }
1308     }
1309     if (S) {
1310       for (j = 0; j < q; j++) {
1311         idx[c * q + j] = (r + rstart / p) * q + j;
1312         v[c * q + j] += S[s + j * p];
1313       }
1314     }
1315   }
1316 
1317   if (ncols) *ncols = nz;
1318   if (cols) *cols = idx;
1319   if (values) *values = v;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1324   PetscFunctionBegin;
1325   PetscCall(PetscFree2(*idx, *v));
1326   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
1331   Mat A;
1332 
1333   PetscFunctionBegin;
1334   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1335   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1336   PetscCall(MatDestroy(&A));
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 /* ---------------------------------------------------------------------------------- */
1341 /*@C
1342   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form:
1343 
1344     [I \otimes S + A \otimes T]
1345 
1346   where
1347     S is a dense (p \times q) matrix
1348     T is a dense (p \times q) matrix
1349     A is an `MATAIJ`  (n \times n) matrix
1350     I is the identity matrix
1351   The resulting matrix is (np \times nq)
1352 
1353   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1354 
1355   Collective
1356 
1357   Input Parameters:
1358 + A - the `MATAIJ` matrix
1359 . p - number of rows in S and T
1360 . q - number of columns in S and T
1361 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1362 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1363 
1364   Output Parameter:
1365 . kaij - the new `MATKAIJ` matrix
1366 
1367   Notes:
1368   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1369 
1370   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1371 
1372   Developer Note:
1373   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1374   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1375   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1376   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1377 
1378   Level: advanced
1379 
1380 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1381 @*/
1382 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) {
1383   PetscFunctionBegin;
1384   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1385   PetscCall(MatSetType(*kaij, MATKAIJ));
1386   PetscCall(MatKAIJSetAIJ(*kaij, A));
1387   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1388   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1389   PetscCall(MatSetUp(*kaij));
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*MC
1394   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1395     [I \otimes S + A \otimes T],
1396   where
1397     S is a dense (p \times q) matrix,
1398     T is a dense (p \times q) matrix,
1399     A is an AIJ  (n \times n) matrix,
1400     and I is the identity matrix.
1401   The resulting matrix is (np \times nq).
1402 
1403   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1404 
1405   Note:
1406   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1407   where x and b are column vectors containing the row-major representations of X and B.
1408 
1409   Level: advanced
1410 
1411 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1412 M*/
1413 
1414 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) {
1415   Mat_MPIKAIJ *b;
1416   PetscMPIInt  size;
1417 
1418   PetscFunctionBegin;
1419   PetscCall(PetscNew(&b));
1420   A->data = (void *)b;
1421 
1422   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1423 
1424   b->w = NULL;
1425   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1426   if (size == 1) {
1427     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1428     A->ops->destroy             = MatDestroy_SeqKAIJ;
1429     A->ops->mult                = MatMult_SeqKAIJ;
1430     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1431     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1432     A->ops->getrow              = MatGetRow_SeqKAIJ;
1433     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1434     A->ops->sor                 = MatSOR_SeqKAIJ;
1435     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1436   } else {
1437     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1438     A->ops->destroy             = MatDestroy_MPIKAIJ;
1439     A->ops->mult                = MatMult_MPIKAIJ;
1440     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1441     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1442     A->ops->getrow              = MatGetRow_MPIKAIJ;
1443     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1444     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1445     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1446   }
1447   A->ops->setup           = MatSetUp_KAIJ;
1448   A->ops->view            = MatView_KAIJ;
1449   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1450   PetscFunctionReturn(0);
1451 }
1452