xref: /petsc/src/mat/impls/kaij/kaij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928) !
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32 
33    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34 
35    Input Parameter:
36 .  A - the `MATKAIJ` matrix
37 
38    Output Parameter:
39 .  B - the `MATAIJ` matrix
40 
41    Level: advanced
42 
43    Note:
44    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45 
46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47 @*/
48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the `MATKAIJ` matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note:
80    All output parameters are optional (pass NULL if not desired)
81 
82    Level: advanced
83 
84 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the `MATKAIJ` matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note:
109    All output parameters are optional (pass NULL if not desired)
110 
111    Level: advanced
112 
113 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the `MATKAIJ` matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with `MatKAIJGetS()`
134 
135   Note:
136   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 
141 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142 @*/
143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscCall(PetscObjectStateIncrease((PetscObject)A));
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the `MATKAIJ` matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with `MatKAIJGetS()`
160 
161   Note:
162   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
163   If NULL is passed, it will not attempt to zero the array pointer.
164 
165   Level: advanced
166 
167 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) {
170   PetscFunctionBegin;
171   if (S) *S = NULL;
172   PetscFunctionReturn(0);
173 }
174 
175 /*@C
176    MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix
177 
178    Not Collective; the entire T is stored and returned independently on all processes
179 
180    Input Parameter:
181 .  A - the `MATKAIJ` matrix
182 
183    Output Parameters:
184 +  m - the number of rows in T
185 .  n - the number of columns in T
186 -  T - the T matrix, in form of a scalar array in column-major format
187 
188    Note:
189    All output parameters are optional (pass NULL if not desired)
190 
191    Level: advanced
192 
193 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) {
196   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
197   PetscFunctionBegin;
198   if (m) *m = b->p;
199   if (n) *n = b->q;
200   if (T) *T = b->T;
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix
206 
207    Not Collective; the entire T is stored and returned independently on all processes
208 
209    Input Parameter:
210 .  A - the `MATKAIJ` matrix
211 
212    Output Parameters:
213 +  m - the number of rows in T
214 .  n - the number of columns in T
215 -  T - the T matrix, in form of a scalar array in column-major format
216 
217    Note: All output parameters are optional (pass NULL if not desired)
218 
219    Level: advanced
220 
221 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
222 @*/
223 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the `MATKAIJ` matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with `MatKAIJGetS()`
242 
243   Note:
244   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
245   If NULL is passed, it will not attempt to zero the array pointer.
246 
247   Level: advanced
248 
249 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) {
252   PetscFunctionBegin;
253   if (T) *T = NULL;
254   PetscCall(PetscObjectStateIncrease((PetscObject)A));
255   PetscFunctionReturn(0);
256 }
257 
258 /*@C
259   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
260 
261   Not collective
262 
263   Input Parameter:
264 . A - the `MATKAIJ` matrix
265 
266   Output Parameter:
267 . T - location of pointer to array obtained with `MatKAIJGetS()`
268 
269   Note:
270   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
271   If NULL is passed, it will not attempt to zero the array pointer.
272 
273   Level: advanced
274 
275 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
276 @*/
277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) {
278   PetscFunctionBegin;
279   if (T) *T = NULL;
280   PetscFunctionReturn(0);
281 }
282 
283 /*@
284    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
285 
286    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
287 
288    Input Parameters:
289 +  A - the `MATKAIJ` matrix
290 -  B - the `MATAIJ` matrix
291 
292    Notes:
293    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
294 
295    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
296 
297    Level: advanced
298 
299 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
300 @*/
301 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) {
302   PetscMPIInt size;
303   PetscBool   flg;
304 
305   PetscFunctionBegin;
306   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
307   if (size == 1) {
308     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
309     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
311     a->AIJ         = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
314     a->A           = B;
315   }
316   PetscCall(PetscObjectReference((PetscObject)B));
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the `MATKAIJ` matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes:
332    The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix.
333 
334    The S matrix is copied, so the user can destroy this array.
335 
336    Level: advanced
337 
338 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
339 @*/
340 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) {
341   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
342 
343   PetscFunctionBegin;
344   PetscCall(PetscFree(a->S));
345   if (S) {
346     PetscCall(PetscMalloc1(p * q, &a->S));
347     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
348   } else a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the `MATKAIJ` matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) {
371   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
372   PetscInt     i, j;
373 
374   PetscFunctionBegin;
375   if (a->p != a->q) {
376     *identity = PETSC_FALSE;
377     PetscFunctionReturn(0);
378   } else *identity = PETSC_TRUE;
379   if (!a->isTI || a->S) {
380     for (i = 0; i < a->p && *identity; i++) {
381       for (j = 0; j < a->p && *identity; j++) {
382         if (i != j) {
383           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         } else {
386           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
387           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   PetscFunctionReturn(0);
393 }
394 
395 /*@C
396    MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix
397 
398    Logically Collective; the entire T is stored independently on all processes.
399 
400    Input Parameters:
401 +  A - the `MATKAIJ` matrix
402 .  p - the number of rows in S
403 .  q - the number of columns in S
404 -  T - the T matrix, in form of a scalar array in column-major format
405 
406    Notes:
407    The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix.
408 
409    The T matrix is copied, so the user can destroy this array.
410 
411    Level: Advanced
412 
413 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
414 @*/
415 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) {
416   PetscInt     i, j;
417   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
418   PetscBool    isTI = PETSC_FALSE;
419 
420   PetscFunctionBegin;
421   /* check if T is an identity matrix */
422   if (T && (p == q)) {
423     isTI = PETSC_TRUE;
424     for (i = 0; i < p; i++) {
425       for (j = 0; j < q; j++) {
426         if (i == j) {
427           /* diagonal term must be 1 */
428           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
429         } else {
430           /* off-diagonal term must be 0 */
431           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
432         }
433       }
434     }
435   }
436   a->isTI = isTI;
437 
438   PetscCall(PetscFree(a->T));
439   if (T && (!isTI)) {
440     PetscCall(PetscMalloc1(p * q, &a->T));
441     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
442   } else a->T = NULL;
443 
444   a->p = p;
445   a->q = q;
446   PetscFunctionReturn(0);
447 }
448 
449 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) {
450   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
451 
452   PetscFunctionBegin;
453   PetscCall(MatDestroy(&b->AIJ));
454   PetscCall(PetscFree(b->S));
455   PetscCall(PetscFree(b->T));
456   PetscCall(PetscFree(b->ibdiag));
457   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
458   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
459   PetscCall(PetscFree(A->data));
460   PetscFunctionReturn(0);
461 }
462 
463 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) {
464   Mat_MPIKAIJ     *a;
465   Mat_MPIAIJ      *mpiaij;
466   PetscScalar     *T;
467   PetscInt         i, j;
468   PetscObjectState state;
469 
470   PetscFunctionBegin;
471   a      = (Mat_MPIKAIJ *)A->data;
472   mpiaij = (Mat_MPIAIJ *)a->A->data;
473 
474   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
475   if (state == a->state) {
476     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
477     PetscFunctionReturn(0);
478   } else {
479     PetscCall(MatDestroy(&a->AIJ));
480     PetscCall(MatDestroy(&a->OAIJ));
481     if (a->isTI) {
482       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
483        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
484        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
485        * to pass in. */
486       PetscCall(PetscMalloc1(a->p * a->q, &T));
487       for (i = 0; i < a->p; i++) {
488         for (j = 0; j < a->q; j++) {
489           if (i == j) T[i + j * a->p] = 1.0;
490           else T[i + j * a->p] = 0.0;
491         }
492       }
493     } else T = a->T;
494     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
495     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
496     if (a->isTI) PetscCall(PetscFree(T));
497     a->state = state;
498   }
499 
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatSetUp_KAIJ(Mat A) {
504   PetscInt     n;
505   PetscMPIInt  size;
506   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
507 
508   PetscFunctionBegin;
509   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
510   if (size == 1) {
511     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
512     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
513     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
514     PetscCall(PetscLayoutSetUp(A->rmap));
515     PetscCall(PetscLayoutSetUp(A->cmap));
516   } else {
517     Mat_MPIKAIJ *a;
518     Mat_MPIAIJ  *mpiaij;
519     IS           from, to;
520     Vec          gvec;
521 
522     a      = (Mat_MPIKAIJ *)A->data;
523     mpiaij = (Mat_MPIAIJ *)a->A->data;
524     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
525     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
526     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
527     PetscCall(PetscLayoutSetUp(A->rmap));
528     PetscCall(PetscLayoutSetUp(A->cmap));
529 
530     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
531 
532     PetscCall(VecGetSize(mpiaij->lvec, &n));
533     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
534     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
535     PetscCall(VecSetBlockSize(a->w, a->q));
536     PetscCall(VecSetType(a->w, VECSEQ));
537 
538     /* create two temporary Index sets for build scatter gather */
539     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
540     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
541 
542     /* create temporary global vector to generate scatter context */
543     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
544 
545     /* generate the scatter context */
546     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
547 
548     PetscCall(ISDestroy(&from));
549     PetscCall(ISDestroy(&to));
550     PetscCall(VecDestroy(&gvec));
551   }
552 
553   A->assembled = PETSC_TRUE;
554   PetscFunctionReturn(0);
555 }
556 
557 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) {
558   PetscViewerFormat format;
559   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
560   Mat               B;
561   PetscInt          i;
562   PetscBool         ismpikaij;
563 
564   PetscFunctionBegin;
565   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
566   PetscCall(PetscViewerGetFormat(viewer, &format));
567   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
569 
570     /* Print appropriate details for S. */
571     if (!a->S) {
572       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
573     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
575       for (i = 0; i < (a->p * a->q); i++) {
576 #if defined(PETSC_USE_COMPLEX)
577         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
578 #else
579         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
580 #endif
581       }
582       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
583     }
584 
585     /* Print appropriate details for T. */
586     if (a->isTI) {
587       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
588     } else if (!a->T) {
589       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
590     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
592       for (i = 0; i < (a->p * a->q); i++) {
593 #if defined(PETSC_USE_COMPLEX)
594         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
595 #else
596         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
597 #endif
598       }
599       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
600     }
601 
602     /* Now print details for the AIJ matrix, using the AIJ viewer. */
603     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
604     if (ismpikaij) {
605       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
606       PetscCall(MatView(b->A, viewer));
607     } else {
608       PetscCall(MatView(a->AIJ, viewer));
609     }
610 
611   } else {
612     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
614     PetscCall(MatView(B, viewer));
615     PetscCall(MatDestroy(&B));
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) {
621   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
622 
623   PetscFunctionBegin;
624   PetscCall(MatDestroy(&b->AIJ));
625   PetscCall(MatDestroy(&b->OAIJ));
626   PetscCall(MatDestroy(&b->A));
627   PetscCall(VecScatterDestroy(&b->ctx));
628   PetscCall(VecDestroy(&b->w));
629   PetscCall(PetscFree(b->S));
630   PetscCall(PetscFree(b->T));
631   PetscCall(PetscFree(b->ibdiag));
632   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
634   PetscCall(PetscFree(A->data));
635   PetscFunctionReturn(0);
636 }
637 
638 /* --------------------------------------------------------------------------------------*/
639 
640 /* zz = yy + Axx */
641 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
642   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
643   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
644   const PetscScalar *s = b->S, *t = b->T;
645   const PetscScalar *x, *v, *bx;
646   PetscScalar       *y, *sums;
647   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
648   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
649 
650   PetscFunctionBegin;
651   if (!yy) {
652     PetscCall(VecSet(zz, 0.0));
653   } else {
654     PetscCall(VecCopy(yy, zz));
655   }
656   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
657 
658   PetscCall(VecGetArrayRead(xx, &x));
659   PetscCall(VecGetArray(zz, &y));
660   idx = a->j;
661   v   = a->a;
662   ii  = a->i;
663 
664   if (b->isTI) {
665     for (i = 0; i < m; i++) {
666       jrow = ii[i];
667       n    = ii[i + 1] - jrow;
668       sums = y + p * i;
669       for (j = 0; j < n; j++) {
670         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
671       }
672     }
673     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
674   } else if (t) {
675     for (i = 0; i < m; i++) {
676       jrow = ii[i];
677       n    = ii[i + 1] - jrow;
678       sums = y + p * i;
679       for (j = 0; j < n; j++) {
680         for (k = 0; k < p; k++) {
681           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
682         }
683       }
684     }
685     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
686      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
687      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
688      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
689     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
690   }
691   if (s) {
692     for (i = 0; i < m; i++) {
693       sums = y + p * i;
694       bx   = x + q * i;
695       if (i < b->AIJ->cmap->n) {
696         for (j = 0; j < q; j++) {
697           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
698         }
699       }
700     }
701     PetscCall(PetscLogFlops(2.0 * m * p * q));
702   }
703 
704   PetscCall(VecRestoreArrayRead(xx, &x));
705   PetscCall(VecRestoreArray(zz, &y));
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) {
710   PetscFunctionBegin;
711   PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy));
712   PetscFunctionReturn(0);
713 }
714 
715 #include <petsc/private/kernels/blockinvert.h>
716 
717 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) {
718   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
719   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
720   const PetscScalar *S = b->S;
721   const PetscScalar *T = b->T;
722   const PetscScalar *v = a->a;
723   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724   PetscInt           i, j, *v_pivots, dof, dof2;
725   PetscScalar       *diag, aval, *v_work;
726 
727   PetscFunctionBegin;
728   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
729   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
730 
731   dof  = p;
732   dof2 = dof * dof;
733 
734   if (b->ibdiagvalid) {
735     if (values) *values = b->ibdiag;
736     PetscFunctionReturn(0);
737   }
738   if (!b->ibdiag) {
739     PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
740     PetscCall(PetscLogObjectMemory((PetscObject)A, dof2 * m * sizeof(PetscScalar)));
741   }
742   if (values) *values = b->ibdiag;
743   diag = b->ibdiag;
744 
745   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
746   for (i = 0; i < m; i++) {
747     if (S) {
748       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
749     } else {
750       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
751     }
752     if (b->isTI) {
753       aval = 0;
754       for (j = ii[i]; j < ii[i + 1]; j++)
755         if (idx[j] == i) aval = v[j];
756       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
757     } else if (T) {
758       aval = 0;
759       for (j = ii[i]; j < ii[i + 1]; j++)
760         if (idx[j] == i) aval = v[j];
761       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
762     }
763     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
764     diag += dof2;
765   }
766   PetscCall(PetscFree2(v_work, v_pivots));
767 
768   b->ibdiagvalid = PETSC_TRUE;
769   PetscFunctionReturn(0);
770 }
771 
772 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) {
773   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
774 
775   PetscFunctionBegin;
776   *B = kaij->AIJ;
777   PetscFunctionReturn(0);
778 }
779 
780 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
781   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
782   Mat            AIJ, OAIJ, B;
783   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
784   const PetscInt p = a->p, q = a->q;
785   PetscBool      ismpikaij, missing;
786 
787   PetscFunctionBegin;
788   if (reuse != MAT_REUSE_MATRIX) {
789     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
790     if (ismpikaij) {
791       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
792       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
793       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
794     } else {
795       AIJ  = a->AIJ;
796       OAIJ = NULL;
797     }
798     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
799     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
800     PetscCall(MatSetType(B, MATAIJ));
801     PetscCall(MatGetSize(AIJ, &m, NULL));
802     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
803     if (!missing || !a->S) d = m;
804     PetscCall(PetscMalloc1(m * p, &d_nnz));
805     for (i = 0; i < m; ++i) {
806       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
807       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
808       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
809     }
810     if (OAIJ) {
811       PetscCall(PetscMalloc1(m * p, &o_nnz));
812       for (i = 0; i < m; ++i) {
813         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
814         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
815         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
816       }
817       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
818     } else {
819       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
820     }
821     PetscCall(PetscFree(d_nnz));
822     PetscCall(PetscFree(o_nnz));
823   } else B = *newmat;
824   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
825   if (reuse == MAT_INPLACE_MATRIX) {
826     PetscCall(MatHeaderReplace(A, &B));
827   } else *newmat = B;
828   PetscFunctionReturn(0);
829 }
830 
831 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
832   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
833   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
834   const PetscScalar *aa = a->a, *T = kaij->T, *v;
835   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
836   const PetscScalar *b, *xb, *idiag;
837   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
838   PetscInt           i, j, k, i2, bs, bs2, nz;
839 
840   PetscFunctionBegin;
841   its = its * lits;
842   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
843   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
844   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
845   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
846   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
847   bs  = p;
848   bs2 = bs * bs;
849 
850   if (!m) PetscFunctionReturn(0);
851 
852   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
853   idiag = kaij->ibdiag;
854   diag  = a->diag;
855 
856   if (!kaij->sor.setup) {
857     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
858     kaij->sor.setup = PETSC_TRUE;
859   }
860   y    = kaij->sor.y;
861   w    = kaij->sor.w;
862   work = kaij->sor.work;
863   t    = kaij->sor.t;
864   arr  = kaij->sor.arr;
865 
866   PetscCall(VecGetArray(xx, &x));
867   PetscCall(VecGetArrayRead(bb, &b));
868 
869   if (flag & SOR_ZERO_INITIAL_GUESS) {
870     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
871       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
872       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
873       i2 = bs;
874       idiag += bs2;
875       for (i = 1; i < m; i++) {
876         v  = aa + ai[i];
877         vi = aj + ai[i];
878         nz = diag[i] - ai[i];
879 
880         if (T) { /* b - T (Arow * x) */
881           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
882           for (j = 0; j < nz; j++) {
883             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
884           }
885           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
886           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
887         } else if (kaij->isTI) {
888           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
889           for (j = 0; j < nz; j++) {
890             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
891           }
892         } else {
893           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
894         }
895 
896         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
897         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
898 
899         idiag += bs2;
900         i2 += bs;
901       }
902       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
903       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
904       xb = t;
905     } else xb = b;
906     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
907       idiag = kaij->ibdiag + bs2 * (m - 1);
908       i2    = bs * (m - 1);
909       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
910       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
911       i2 -= bs;
912       idiag -= bs2;
913       for (i = m - 2; i >= 0; i--) {
914         v  = aa + diag[i] + 1;
915         vi = aj + diag[i] + 1;
916         nz = ai[i + 1] - diag[i] - 1;
917 
918         if (T) { /* FIXME: This branch untested */
919           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
920           /* copy all rows of x that are needed into contiguous space */
921           workt = work;
922           for (j = 0; j < nz; j++) {
923             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
924             workt += bs;
925           }
926           arrt = arr;
927           for (j = 0; j < nz; j++) {
928             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
929             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
930             arrt += bs2;
931           }
932           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
933         } else if (kaij->isTI) {
934           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
935           for (j = 0; j < nz; j++) {
936             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
937           }
938         }
939 
940         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
941         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
942 
943         idiag -= bs2;
944         i2 -= bs;
945       }
946       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
947     }
948     its--;
949   }
950   while (its--) { /* FIXME: This branch not updated */
951     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
952       i2    = 0;
953       idiag = kaij->ibdiag;
954       for (i = 0; i < m; i++) {
955         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
956 
957         v     = aa + ai[i];
958         vi    = aj + ai[i];
959         nz    = diag[i] - ai[i];
960         workt = work;
961         for (j = 0; j < nz; j++) {
962           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
963           workt += bs;
964         }
965         arrt = arr;
966         if (T) {
967           for (j = 0; j < nz; j++) {
968             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
969             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
970             arrt += bs2;
971           }
972           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
973         } else if (kaij->isTI) {
974           for (j = 0; j < nz; j++) {
975             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
976             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
977             arrt += bs2;
978           }
979           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
980         }
981         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
982 
983         v     = aa + diag[i] + 1;
984         vi    = aj + diag[i] + 1;
985         nz    = ai[i + 1] - diag[i] - 1;
986         workt = work;
987         for (j = 0; j < nz; j++) {
988           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
989           workt += bs;
990         }
991         arrt = arr;
992         if (T) {
993           for (j = 0; j < nz; j++) {
994             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
995             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
996             arrt += bs2;
997           }
998           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
999         } else if (kaij->isTI) {
1000           for (j = 0; j < nz; j++) {
1001             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1002             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1003             arrt += bs2;
1004           }
1005           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1006         }
1007 
1008         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1009         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1010 
1011         idiag += bs2;
1012         i2 += bs;
1013       }
1014       xb = t;
1015     } else xb = b;
1016     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1017       idiag = kaij->ibdiag + bs2 * (m - 1);
1018       i2    = bs * (m - 1);
1019       if (xb == b) {
1020         for (i = m - 1; i >= 0; i--) {
1021           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1022 
1023           v     = aa + ai[i];
1024           vi    = aj + ai[i];
1025           nz    = diag[i] - ai[i];
1026           workt = work;
1027           for (j = 0; j < nz; j++) {
1028             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1029             workt += bs;
1030           }
1031           arrt = arr;
1032           if (T) {
1033             for (j = 0; j < nz; j++) {
1034               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1035               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1036               arrt += bs2;
1037             }
1038             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1039           } else if (kaij->isTI) {
1040             for (j = 0; j < nz; j++) {
1041               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1042               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1043               arrt += bs2;
1044             }
1045             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1046           }
1047 
1048           v     = aa + diag[i] + 1;
1049           vi    = aj + diag[i] + 1;
1050           nz    = ai[i + 1] - diag[i] - 1;
1051           workt = work;
1052           for (j = 0; j < nz; j++) {
1053             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1054             workt += bs;
1055           }
1056           arrt = arr;
1057           if (T) {
1058             for (j = 0; j < nz; j++) {
1059               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1060               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1061               arrt += bs2;
1062             }
1063             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1064           } else if (kaij->isTI) {
1065             for (j = 0; j < nz; j++) {
1066               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1067               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1068               arrt += bs2;
1069             }
1070             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1071           }
1072 
1073           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1074           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1075         }
1076       } else {
1077         for (i = m - 1; i >= 0; i--) {
1078           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1079           v     = aa + diag[i] + 1;
1080           vi    = aj + diag[i] + 1;
1081           nz    = ai[i + 1] - diag[i] - 1;
1082           workt = work;
1083           for (j = 0; j < nz; j++) {
1084             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1085             workt += bs;
1086           }
1087           arrt = arr;
1088           if (T) {
1089             for (j = 0; j < nz; j++) {
1090               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1091               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1092               arrt += bs2;
1093             }
1094             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1095           } else if (kaij->isTI) {
1096             for (j = 0; j < nz; j++) {
1097               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1098               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1099               arrt += bs2;
1100             }
1101             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1102           }
1103           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1104           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1105         }
1106       }
1107       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1108     }
1109   }
1110 
1111   PetscCall(VecRestoreArray(xx, &x));
1112   PetscCall(VecRestoreArrayRead(bb, &b));
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 /*===================================================================================*/
1117 
1118 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1119   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1120 
1121   PetscFunctionBegin;
1122   if (!yy) {
1123     PetscCall(VecSet(zz, 0.0));
1124   } else {
1125     PetscCall(VecCopy(yy, zz));
1126   }
1127   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1128   /* start the scatter */
1129   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1130   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1131   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1132   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) {
1137   PetscFunctionBegin;
1138   PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy));
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) {
1143   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1147   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /* ----------------------------------------------------------------*/
1152 
1153 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1154   Mat_SeqKAIJ   *b    = (Mat_SeqKAIJ *)A->data;
1155   PetscErrorCode diag = PETSC_FALSE;
1156   PetscInt       nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1157   PetscScalar   *vaij, *v, *S = b->S, *T = b->T;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1161   b->getrowactive = PETSC_TRUE;
1162   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1163 
1164   if ((!S) && (!T) && (!b->isTI)) {
1165     if (ncols) *ncols = 0;
1166     if (cols) *cols = NULL;
1167     if (values) *values = NULL;
1168     PetscFunctionReturn(0);
1169   }
1170 
1171   if (T || b->isTI) {
1172     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1173     c = nzaij;
1174     for (i = 0; i < nzaij; i++) {
1175       /* check if this row contains a diagonal entry */
1176       if (colsaij[i] == r) {
1177         diag = PETSC_TRUE;
1178         c    = i;
1179       }
1180     }
1181   } else nzaij = c = 0;
1182 
1183   /* calculate size of row */
1184   nz = 0;
1185   if (S) nz += q;
1186   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1187 
1188   if (cols || values) {
1189     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1190     for (i = 0; i < q; i++) {
1191       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1192       v[i] = 0.0;
1193     }
1194     if (b->isTI) {
1195       for (i = 0; i < nzaij; i++) {
1196         for (j = 0; j < q; j++) {
1197           idx[i * q + j] = colsaij[i] * q + j;
1198           v[i * q + j]   = (j == s ? vaij[i] : 0);
1199         }
1200       }
1201     } else if (T) {
1202       for (i = 0; i < nzaij; i++) {
1203         for (j = 0; j < q; j++) {
1204           idx[i * q + j] = colsaij[i] * q + j;
1205           v[i * q + j]   = vaij[i] * T[s + j * p];
1206         }
1207       }
1208     }
1209     if (S) {
1210       for (j = 0; j < q; j++) {
1211         idx[c * q + j] = r * q + j;
1212         v[c * q + j] += S[s + j * p];
1213       }
1214     }
1215   }
1216 
1217   if (ncols) *ncols = nz;
1218   if (cols) *cols = idx;
1219   if (values) *values = v;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1224   PetscFunctionBegin;
1225   if (nz) *nz = 0;
1226   PetscCall(PetscFree2(*idx, *v));
1227   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) {
1232   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1233   Mat            AIJ  = b->A;
1234   PetscBool      diag = PETSC_FALSE;
1235   Mat            MatAIJ, MatOAIJ;
1236   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1237   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1238   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1239 
1240   PetscFunctionBegin;
1241   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1242   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1243   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1244   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1245   b->getrowactive = PETSC_TRUE;
1246   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1247   lrow = row - rstart;
1248 
1249   if ((!S) && (!T) && (!b->isTI)) {
1250     if (ncols) *ncols = 0;
1251     if (cols) *cols = NULL;
1252     if (values) *values = NULL;
1253     PetscFunctionReturn(0);
1254   }
1255 
1256   r = lrow / p;
1257   s = lrow % p;
1258 
1259   if (T || b->isTI) {
1260     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1261     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1262     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1263 
1264     c = ncolsaij + ncolsoaij;
1265     for (i = 0; i < ncolsaij; i++) {
1266       /* check if this row contains a diagonal entry */
1267       if (colsaij[i] == r) {
1268         diag = PETSC_TRUE;
1269         c    = i;
1270       }
1271     }
1272   } else c = 0;
1273 
1274   /* calculate size of row */
1275   nz = 0;
1276   if (S) nz += q;
1277   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1278 
1279   if (cols || values) {
1280     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1281     for (i = 0; i < q; i++) {
1282       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1283       v[i] = 0.0;
1284     }
1285     if (b->isTI) {
1286       for (i = 0; i < ncolsaij; i++) {
1287         for (j = 0; j < q; j++) {
1288           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1289           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1290         }
1291       }
1292       for (i = 0; i < ncolsoaij; i++) {
1293         for (j = 0; j < q; j++) {
1294           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1295           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1296         }
1297       }
1298     } else if (T) {
1299       for (i = 0; i < ncolsaij; i++) {
1300         for (j = 0; j < q; j++) {
1301           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1302           v[i * q + j]   = vals[i] * T[s + j * p];
1303         }
1304       }
1305       for (i = 0; i < ncolsoaij; i++) {
1306         for (j = 0; j < q; j++) {
1307           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1308           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1309         }
1310       }
1311     }
1312     if (S) {
1313       for (j = 0; j < q; j++) {
1314         idx[c * q + j] = (r + rstart / p) * q + j;
1315         v[c * q + j] += S[s + j * p];
1316       }
1317     }
1318   }
1319 
1320   if (ncols) *ncols = nz;
1321   if (cols) *cols = idx;
1322   if (values) *values = v;
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1327   PetscFunctionBegin;
1328   PetscCall(PetscFree2(*idx, *v));
1329   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
1334   Mat A;
1335 
1336   PetscFunctionBegin;
1337   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1338   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1339   PetscCall(MatDestroy(&A));
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /* ---------------------------------------------------------------------------------- */
1344 /*@C
1345   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form:
1346 
1347     [I \otimes S + A \otimes T]
1348 
1349   where
1350     S is a dense (p \times q) matrix
1351     T is a dense (p \times q) matrix
1352     A is an `MATAIJ`  (n \times n) matrix
1353     I is the identity matrix
1354   The resulting matrix is (np \times nq)
1355 
1356   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1357 
1358   Collective
1359 
1360   Input Parameters:
1361 + A - the `MATAIJ` matrix
1362 . p - number of rows in S and T
1363 . q - number of columns in S and T
1364 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1365 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1366 
1367   Output Parameter:
1368 . kaij - the new `MATKAIJ` matrix
1369 
1370   Notes:
1371   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1372 
1373   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1374 
1375   Developer Note:
1376   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1377   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1378   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1379   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1380 
1381   Level: advanced
1382 
1383 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1384 @*/
1385 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) {
1386   PetscFunctionBegin;
1387   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1388   PetscCall(MatSetType(*kaij, MATKAIJ));
1389   PetscCall(MatKAIJSetAIJ(*kaij, A));
1390   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1391   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1392   PetscCall(MatSetUp(*kaij));
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 /*MC
1397   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1398     [I \otimes S + A \otimes T],
1399   where
1400     S is a dense (p \times q) matrix,
1401     T is a dense (p \times q) matrix,
1402     A is an AIJ  (n \times n) matrix,
1403     and I is the identity matrix.
1404   The resulting matrix is (np \times nq).
1405 
1406   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1407 
1408   Note:
1409   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1410   where x and b are column vectors containing the row-major representations of X and B.
1411 
1412   Level: advanced
1413 
1414 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1415 M*/
1416 
1417 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) {
1418   Mat_MPIKAIJ *b;
1419   PetscMPIInt  size;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(PetscNewLog(A, &b));
1423   A->data = (void *)b;
1424 
1425   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1426 
1427   b->w = NULL;
1428   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1429   if (size == 1) {
1430     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1431     A->ops->destroy             = MatDestroy_SeqKAIJ;
1432     A->ops->mult                = MatMult_SeqKAIJ;
1433     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1434     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1435     A->ops->getrow              = MatGetRow_SeqKAIJ;
1436     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1437     A->ops->sor                 = MatSOR_SeqKAIJ;
1438     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1439   } else {
1440     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1441     A->ops->destroy             = MatDestroy_MPIKAIJ;
1442     A->ops->mult                = MatMult_MPIKAIJ;
1443     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1444     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1445     A->ops->getrow              = MatGetRow_MPIKAIJ;
1446     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1447     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1448     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1449   }
1450   A->ops->setup           = MatSetUp_KAIJ;
1451   A->ops->view            = MatView_KAIJ;
1452   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1453   PetscFunctionReturn(0);
1454 }
1455