xref: /petsc/src/mat/impls/kaij/kaij.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 /*
2   Defines the basic matrix operations for the KAIJ  matrix storage format.
3   This format is used to evaluate matrices of the form:
4 
5     [I \otimes S + A \otimes T]
6 
7   where
8     S is a dense (p \times q) matrix
9     T is a dense (p \times q) matrix
10     A is an AIJ  (n \times n) matrix
11     I is the identity matrix
12 
13   The resulting matrix is (np \times nq)
14 
15   We provide:
16      MatMult()
17      MatMultAdd()
18      MatInvertBlockDiagonal()
19   and
20      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
21 
22   This single directory handles both the sequential and parallel codes
23 */
24 
25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
26 #include <../src/mat/utils/freespace.h>
27 #include <petsc/private/vecimpl.h>
28 
29 /*@C
30   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
31 
32   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
33 
34   Input Parameter:
35 . A - the `MATKAIJ` matrix
36 
37   Output Parameter:
38 . B - the `MATAIJ` matrix
39 
40   Level: advanced
41 
42   Note:
43   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
44 
45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46 @*/
47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48 {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 /*@C
67   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
68 
69   Not Collective; the entire `S` is stored and returned independently on all processes.
70 
71   Input Parameter:
72 . A - the `MATKAIJ` matrix
73 
74   Output Parameters:
75 + m - the number of rows in `S`
76 . n - the number of columns in `S`
77 - S - the S matrix, in form of a scalar array in column-major format
78 
79   Level: advanced
80 
81   Note:
82   All output parameters are optional (pass `NULL` if not desired)
83 
84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 /*@C
97   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
98 
99   Not Collective; the entire `S` is stored and returned independently on all processes.
100 
101   Input Parameter:
102 . A - the `MATKAIJ` matrix
103 
104   Output Parameters:
105 + m - the number of rows in `S`
106 . n - the number of columns in `S`
107 - S - the S matrix, in form of a scalar array in column-major format
108 
109   Level: advanced
110 
111   Note:
112   All output parameters are optional (pass `NULL` if not desired)
113 
114 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
115 @*/
116 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
117 {
118   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
119   PetscFunctionBegin;
120   if (m) *m = b->p;
121   if (n) *n = b->q;
122   if (S) *S = b->S;
123   PetscFunctionReturn(PETSC_SUCCESS);
124 }
125 
126 /*@C
127   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
128 
129   Not Collective
130 
131   Input Parameters:
132 + A - the `MATKAIJ` matrix
133 - S - location of pointer to array obtained with `MatKAIJGetS()`
134 
135   Level: advanced
136 
137   Note:
138   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
139   If `NULL` is passed, it will not attempt to zero the array pointer.
140 
141 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142 @*/
143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
144 {
145   PetscFunctionBegin;
146   if (S) *S = NULL;
147   PetscCall(PetscObjectStateIncrease((PetscObject)A));
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 /*@C
152   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
153 
154   Not Collective
155 
156   Input Parameters:
157 + A - the `MATKAIJ` matrix
158 - S - location of pointer to array obtained with `MatKAIJGetS()`
159 
160   Level: advanced
161 
162   Note:
163   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If `NULL` is passed, it will not attempt to zero the array pointer.
165 
166 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
167 @*/
168 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
169 {
170   PetscFunctionBegin;
171   if (S) *S = NULL;
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 /*@C
176   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
177 
178   Not Collective; the entire `T` is stored and returned independently on all processes
179 
180   Input Parameter:
181 . A - the `MATKAIJ` matrix
182 
183   Output Parameters:
184 + m - the number of rows in `T`
185 . n - the number of columns in `T`
186 - T - the T matrix, in form of a scalar array in column-major format
187 
188   Level: advanced
189 
190   Note:
191   All output parameters are optional (pass `NULL` if not desired)
192 
193 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 /*@C
206   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
207 
208   Not Collective; the entire `T` is stored and returned independently on all processes
209 
210   Input Parameter:
211 . A - the `MATKAIJ` matrix
212 
213   Output Parameters:
214 + m - the number of rows in `T`
215 . n - the number of columns in `T`
216 - T - the T matrix, in form of a scalar array in column-major format
217 
218   Level: advanced
219 
220   Note:
221   All output parameters are optional (pass `NULL` if not desired)
222 
223 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
224 @*/
225 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
226 {
227   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
228   PetscFunctionBegin;
229   if (m) *m = b->p;
230   if (n) *n = b->q;
231   if (T) *T = b->T;
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 /*@C
236   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
237 
238   Not Collective
239 
240   Input Parameters:
241 + A - the `MATKAIJ` matrix
242 - T - location of pointer to array obtained with `MatKAIJGetS()`
243 
244   Level: advanced
245 
246   Note:
247   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
248   If `NULL` is passed, it will not attempt to zero the array pointer.
249 
250 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
251 @*/
252 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
253 {
254   PetscFunctionBegin;
255   if (T) *T = NULL;
256   PetscCall(PetscObjectStateIncrease((PetscObject)A));
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 /*@C
261   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
262 
263   Not Collective
264 
265   Input Parameters:
266 + A - the `MATKAIJ` matrix
267 - T - location of pointer to array obtained with `MatKAIJGetS()`
268 
269   Level: advanced
270 
271   Note:
272   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If `NULL` is passed, it will not attempt to zero the array pointer.
274 
275 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
276 @*/
277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
278 {
279   PetscFunctionBegin;
280   if (T) *T = NULL;
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 /*@
285   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
286 
287   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
288 
289   Input Parameters:
290 + A - the `MATKAIJ` matrix
291 - B - the `MATAIJ` matrix
292 
293   Level: advanced
294 
295   Notes:
296   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
297 
298   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
299 
300 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
303 {
304   PetscMPIInt size;
305   PetscBool   flg;
306 
307   PetscFunctionBegin;
308   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
309   if (size == 1) {
310     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
311     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
312     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
313     a->AIJ         = B;
314   } else {
315     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
316     a->A           = B;
317   }
318   PetscCall(PetscObjectReference((PetscObject)B));
319   PetscFunctionReturn(PETSC_SUCCESS);
320 }
321 
322 /*@C
323   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
324 
325   Logically Collective; the entire `S` is stored independently on all processes.
326 
327   Input Parameters:
328 + A - the `MATKAIJ` matrix
329 . p - the number of rows in `S`
330 . q - the number of columns in `S`
331 - S - the S matrix, in form of a scalar array in column-major format
332 
333   Level: advanced
334 
335   Notes:
336   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
337 
338   The `S` matrix is copied, so the user can destroy this array.
339 
340 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
341 @*/
342 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
343 {
344   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
345 
346   PetscFunctionBegin;
347   PetscCall(PetscFree(a->S));
348   if (S) {
349     PetscCall(PetscMalloc1(p * q, &a->S));
350     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
351   } else a->S = NULL;
352 
353   a->p = p;
354   a->q = q;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*@C
359   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
360 
361   Logically Collective.
362 
363   Input Parameter:
364 . A - the `MATKAIJ` matrix
365 
366   Output Parameter:
367 . identity - the Boolean value
368 
369   Level: advanced
370 
371 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
372 @*/
373 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
374 {
375   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
376   PetscInt     i, j;
377 
378   PetscFunctionBegin;
379   if (a->p != a->q) {
380     *identity = PETSC_FALSE;
381     PetscFunctionReturn(PETSC_SUCCESS);
382   } else *identity = PETSC_TRUE;
383   if (!a->isTI || a->S) {
384     for (i = 0; i < a->p && *identity; i++) {
385       for (j = 0; j < a->p && *identity; j++) {
386         if (i != j) {
387           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         } else {
390           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
391           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392         }
393       }
394     }
395   }
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 /*@C
400   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
401 
402   Logically Collective; the entire `T` is stored independently on all processes.
403 
404   Input Parameters:
405 + A - the `MATKAIJ` matrix
406 . p - the number of rows in `S`
407 . q - the number of columns in `S`
408 - T - the `T` matrix, in form of a scalar array in column-major format
409 
410   Level: advanced
411 
412   Notes:
413   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
414 
415   The `T` matrix is copied, so the user can destroy this array.
416 
417 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
418 @*/
419 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
420 {
421   PetscInt     i, j;
422   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
423   PetscBool    isTI = PETSC_FALSE;
424 
425   PetscFunctionBegin;
426   /* check if T is an identity matrix */
427   if (T && (p == q)) {
428     isTI = PETSC_TRUE;
429     for (i = 0; i < p; i++) {
430       for (j = 0; j < q; j++) {
431         if (i == j) {
432           /* diagonal term must be 1 */
433           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
434         } else {
435           /* off-diagonal term must be 0 */
436           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
437         }
438       }
439     }
440   }
441   a->isTI = isTI;
442 
443   PetscCall(PetscFree(a->T));
444   if (T && (!isTI)) {
445     PetscCall(PetscMalloc1(p * q, &a->T));
446     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
447   } else a->T = NULL;
448 
449   a->p = p;
450   a->q = q;
451   PetscFunctionReturn(PETSC_SUCCESS);
452 }
453 
454 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
455 {
456   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
457 
458   PetscFunctionBegin;
459   PetscCall(MatDestroy(&b->AIJ));
460   PetscCall(PetscFree(b->S));
461   PetscCall(PetscFree(b->T));
462   PetscCall(PetscFree(b->ibdiag));
463   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
464   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
465   PetscCall(PetscFree(A->data));
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
470 {
471   Mat_MPIKAIJ     *a;
472   Mat_MPIAIJ      *mpiaij;
473   PetscScalar     *T;
474   PetscInt         i, j;
475   PetscObjectState state;
476 
477   PetscFunctionBegin;
478   a      = (Mat_MPIKAIJ *)A->data;
479   mpiaij = (Mat_MPIAIJ *)a->A->data;
480 
481   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
482   if (state == a->state) {
483     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
484     PetscFunctionReturn(PETSC_SUCCESS);
485   } else {
486     PetscCall(MatDestroy(&a->AIJ));
487     PetscCall(MatDestroy(&a->OAIJ));
488     if (a->isTI) {
489       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
490        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
491        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
492        * to pass in. */
493       PetscCall(PetscMalloc1(a->p * a->q, &T));
494       for (i = 0; i < a->p; i++) {
495         for (j = 0; j < a->q; j++) {
496           if (i == j) T[i + j * a->p] = 1.0;
497           else T[i + j * a->p] = 0.0;
498         }
499       }
500     } else T = a->T;
501     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
502     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
503     if (a->isTI) PetscCall(PetscFree(T));
504     a->state = state;
505   }
506 
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatSetUp_KAIJ(Mat A)
511 {
512   PetscInt     n;
513   PetscMPIInt  size;
514   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
515 
516   PetscFunctionBegin;
517   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
518   if (size == 1) {
519     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
520     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
521     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
522     PetscCall(PetscLayoutSetUp(A->rmap));
523     PetscCall(PetscLayoutSetUp(A->cmap));
524   } else {
525     Mat_MPIKAIJ *a;
526     Mat_MPIAIJ  *mpiaij;
527     IS           from, to;
528     Vec          gvec;
529 
530     a      = (Mat_MPIKAIJ *)A->data;
531     mpiaij = (Mat_MPIAIJ *)a->A->data;
532     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
533     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
534     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
535     PetscCall(PetscLayoutSetUp(A->rmap));
536     PetscCall(PetscLayoutSetUp(A->cmap));
537 
538     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
539 
540     PetscCall(VecGetSize(mpiaij->lvec, &n));
541     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
542     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
543     PetscCall(VecSetBlockSize(a->w, a->q));
544     PetscCall(VecSetType(a->w, VECSEQ));
545 
546     /* create two temporary Index sets for build scatter gather */
547     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
548     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
549 
550     /* create temporary global vector to generate scatter context */
551     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
552 
553     /* generate the scatter context */
554     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
555 
556     PetscCall(ISDestroy(&from));
557     PetscCall(ISDestroy(&to));
558     PetscCall(VecDestroy(&gvec));
559   }
560 
561   A->assembled = PETSC_TRUE;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
566 {
567   PetscViewerFormat format;
568   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
569   Mat               B;
570   PetscInt          i;
571   PetscBool         ismpikaij;
572 
573   PetscFunctionBegin;
574   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
575   PetscCall(PetscViewerGetFormat(viewer, &format));
576   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
577     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
578 
579     /* Print appropriate details for S. */
580     if (!a->S) {
581       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
582     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
583       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
584       for (i = 0; i < (a->p * a->q); i++) {
585 #if defined(PETSC_USE_COMPLEX)
586         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
587 #else
588         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
589 #endif
590       }
591       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
592     }
593 
594     /* Print appropriate details for T. */
595     if (a->isTI) {
596       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
597     } else if (!a->T) {
598       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
599     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
600       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
601       for (i = 0; i < (a->p * a->q); i++) {
602 #if defined(PETSC_USE_COMPLEX)
603         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
604 #else
605         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
606 #endif
607       }
608       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
609     }
610 
611     /* Now print details for the AIJ matrix, using the AIJ viewer. */
612     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
613     if (ismpikaij) {
614       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
615       PetscCall(MatView(b->A, viewer));
616     } else {
617       PetscCall(MatView(a->AIJ, viewer));
618     }
619 
620   } else {
621     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
622     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
623     PetscCall(MatView(B, viewer));
624     PetscCall(MatDestroy(&B));
625   }
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
630 {
631   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
632 
633   PetscFunctionBegin;
634   PetscCall(MatDestroy(&b->AIJ));
635   PetscCall(MatDestroy(&b->OAIJ));
636   PetscCall(MatDestroy(&b->A));
637   PetscCall(VecScatterDestroy(&b->ctx));
638   PetscCall(VecDestroy(&b->w));
639   PetscCall(PetscFree(b->S));
640   PetscCall(PetscFree(b->T));
641   PetscCall(PetscFree(b->ibdiag));
642   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
644   PetscCall(PetscFree(A->data));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /* zz = yy + Axx */
649 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
650 {
651   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
652   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
653   const PetscScalar *s = b->S, *t = b->T;
654   const PetscScalar *x, *v, *bx;
655   PetscScalar       *y, *sums;
656   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
657   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
658 
659   PetscFunctionBegin;
660   if (!yy) {
661     PetscCall(VecSet(zz, 0.0));
662   } else {
663     PetscCall(VecCopy(yy, zz));
664   }
665   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
666 
667   PetscCall(VecGetArrayRead(xx, &x));
668   PetscCall(VecGetArray(zz, &y));
669   idx = a->j;
670   v   = a->a;
671   ii  = a->i;
672 
673   if (b->isTI) {
674     for (i = 0; i < m; i++) {
675       jrow = ii[i];
676       n    = ii[i + 1] - jrow;
677       sums = y + p * i;
678       for (j = 0; j < n; j++) {
679         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
680       }
681     }
682     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
683   } else if (t) {
684     for (i = 0; i < m; i++) {
685       jrow = ii[i];
686       n    = ii[i + 1] - jrow;
687       sums = y + p * i;
688       for (j = 0; j < n; j++) {
689         for (k = 0; k < p; k++) {
690           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
691         }
692       }
693     }
694     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
695      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
696      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
697      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
698     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
699   }
700   if (s) {
701     for (i = 0; i < m; i++) {
702       sums = y + p * i;
703       bx   = x + q * i;
704       if (i < b->AIJ->cmap->n) {
705         for (j = 0; j < q; j++) {
706           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
707         }
708       }
709     }
710     PetscCall(PetscLogFlops(2.0 * m * p * q));
711   }
712 
713   PetscCall(VecRestoreArrayRead(xx, &x));
714   PetscCall(VecRestoreArray(zz, &y));
715   PetscFunctionReturn(PETSC_SUCCESS);
716 }
717 
718 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
719 {
720   PetscFunctionBegin;
721   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
725 #include <petsc/private/kernels/blockinvert.h>
726 
727 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
728 {
729   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
730   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
731   const PetscScalar *S = b->S;
732   const PetscScalar *T = b->T;
733   const PetscScalar *v = a->a;
734   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
735   PetscInt           i, j, *v_pivots, dof, dof2;
736   PetscScalar       *diag, aval, *v_work;
737 
738   PetscFunctionBegin;
739   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
740   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
741 
742   dof  = p;
743   dof2 = dof * dof;
744 
745   if (b->ibdiagvalid) {
746     if (values) *values = b->ibdiag;
747     PetscFunctionReturn(PETSC_SUCCESS);
748   }
749   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
750   if (values) *values = b->ibdiag;
751   diag = b->ibdiag;
752 
753   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
754   for (i = 0; i < m; i++) {
755     if (S) {
756       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
757     } else {
758       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
759     }
760     if (b->isTI) {
761       aval = 0;
762       for (j = ii[i]; j < ii[i + 1]; j++)
763         if (idx[j] == i) aval = v[j];
764       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
765     } else if (T) {
766       aval = 0;
767       for (j = ii[i]; j < ii[i + 1]; j++)
768         if (idx[j] == i) aval = v[j];
769       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
770     }
771     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
772     diag += dof2;
773   }
774   PetscCall(PetscFree2(v_work, v_pivots));
775 
776   b->ibdiagvalid = PETSC_TRUE;
777   PetscFunctionReturn(PETSC_SUCCESS);
778 }
779 
780 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
781 {
782   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
783 
784   PetscFunctionBegin;
785   *B = kaij->AIJ;
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
790 {
791   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
792   Mat            AIJ, OAIJ, B;
793   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
794   const PetscInt p = a->p, q = a->q;
795   PetscBool      ismpikaij, missing;
796 
797   PetscFunctionBegin;
798   if (reuse != MAT_REUSE_MATRIX) {
799     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
800     if (ismpikaij) {
801       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
802       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
803       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
804     } else {
805       AIJ  = a->AIJ;
806       OAIJ = NULL;
807     }
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
809     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
810     PetscCall(MatSetType(B, MATAIJ));
811     PetscCall(MatGetSize(AIJ, &m, NULL));
812     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
813     if (!missing || !a->S) d = m;
814     PetscCall(PetscMalloc1(m * p, &d_nnz));
815     for (i = 0; i < m; ++i) {
816       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
817       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
818       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
819     }
820     if (OAIJ) {
821       PetscCall(PetscMalloc1(m * p, &o_nnz));
822       for (i = 0; i < m; ++i) {
823         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
824         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
825         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
826       }
827       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
828     } else {
829       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
830     }
831     PetscCall(PetscFree(d_nnz));
832     PetscCall(PetscFree(o_nnz));
833   } else B = *newmat;
834   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
835   if (reuse == MAT_INPLACE_MATRIX) {
836     PetscCall(MatHeaderReplace(A, &B));
837   } else *newmat = B;
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
842 {
843   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
844   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
845   const PetscScalar *aa = a->a, *T = kaij->T, *v;
846   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
847   const PetscScalar *b, *xb, *idiag;
848   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
849   PetscInt           i, j, k, i2, bs, bs2, nz;
850 
851   PetscFunctionBegin;
852   its = its * lits;
853   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
854   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
855   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
856   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
857   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
858   bs  = p;
859   bs2 = bs * bs;
860 
861   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
862 
863   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
864   idiag = kaij->ibdiag;
865   diag  = a->diag;
866 
867   if (!kaij->sor.setup) {
868     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
869     kaij->sor.setup = PETSC_TRUE;
870   }
871   y    = kaij->sor.y;
872   w    = kaij->sor.w;
873   work = kaij->sor.work;
874   t    = kaij->sor.t;
875   arr  = kaij->sor.arr;
876 
877   PetscCall(VecGetArray(xx, &x));
878   PetscCall(VecGetArrayRead(bb, &b));
879 
880   if (flag & SOR_ZERO_INITIAL_GUESS) {
881     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
882       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
883       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
884       i2 = bs;
885       idiag += bs2;
886       for (i = 1; i < m; i++) {
887         v  = aa + ai[i];
888         vi = aj + ai[i];
889         nz = diag[i] - ai[i];
890 
891         if (T) { /* b - T (Arow * x) */
892           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
893           for (j = 0; j < nz; j++) {
894             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
895           }
896           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
897           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
898         } else if (kaij->isTI) {
899           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
900           for (j = 0; j < nz; j++) {
901             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
902           }
903         } else {
904           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
905         }
906 
907         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
908         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
909 
910         idiag += bs2;
911         i2 += bs;
912       }
913       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
914       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
915       xb = t;
916     } else xb = b;
917     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
918       idiag = kaij->ibdiag + bs2 * (m - 1);
919       i2    = bs * (m - 1);
920       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
921       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
922       i2 -= bs;
923       idiag -= bs2;
924       for (i = m - 2; i >= 0; i--) {
925         v  = aa + diag[i] + 1;
926         vi = aj + diag[i] + 1;
927         nz = ai[i + 1] - diag[i] - 1;
928 
929         if (T) { /* FIXME: This branch untested */
930           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
931           /* copy all rows of x that are needed into contiguous space */
932           workt = work;
933           for (j = 0; j < nz; j++) {
934             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
935             workt += bs;
936           }
937           arrt = arr;
938           for (j = 0; j < nz; j++) {
939             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
940             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
941             arrt += bs2;
942           }
943           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
944         } else if (kaij->isTI) {
945           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
946           for (j = 0; j < nz; j++) {
947             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
948           }
949         }
950 
951         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
952         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
953 
954         idiag -= bs2;
955         i2 -= bs;
956       }
957       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
958     }
959     its--;
960   }
961   while (its--) { /* FIXME: This branch not updated */
962     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
963       i2    = 0;
964       idiag = kaij->ibdiag;
965       for (i = 0; i < m; i++) {
966         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
967 
968         v     = aa + ai[i];
969         vi    = aj + ai[i];
970         nz    = diag[i] - ai[i];
971         workt = work;
972         for (j = 0; j < nz; j++) {
973           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
974           workt += bs;
975         }
976         arrt = arr;
977         if (T) {
978           for (j = 0; j < nz; j++) {
979             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
980             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
981             arrt += bs2;
982           }
983           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
984         } else if (kaij->isTI) {
985           for (j = 0; j < nz; j++) {
986             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
987             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
988             arrt += bs2;
989           }
990           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
991         }
992         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
993 
994         v     = aa + diag[i] + 1;
995         vi    = aj + diag[i] + 1;
996         nz    = ai[i + 1] - diag[i] - 1;
997         workt = work;
998         for (j = 0; j < nz; j++) {
999           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1000           workt += bs;
1001         }
1002         arrt = arr;
1003         if (T) {
1004           for (j = 0; j < nz; j++) {
1005             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1006             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1007             arrt += bs2;
1008           }
1009           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1010         } else if (kaij->isTI) {
1011           for (j = 0; j < nz; j++) {
1012             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1013             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1014             arrt += bs2;
1015           }
1016           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1017         }
1018 
1019         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1020         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1021 
1022         idiag += bs2;
1023         i2 += bs;
1024       }
1025       xb = t;
1026     } else xb = b;
1027     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1028       idiag = kaij->ibdiag + bs2 * (m - 1);
1029       i2    = bs * (m - 1);
1030       if (xb == b) {
1031         for (i = m - 1; i >= 0; i--) {
1032           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1033 
1034           v     = aa + ai[i];
1035           vi    = aj + ai[i];
1036           nz    = diag[i] - ai[i];
1037           workt = work;
1038           for (j = 0; j < nz; j++) {
1039             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1040             workt += bs;
1041           }
1042           arrt = arr;
1043           if (T) {
1044             for (j = 0; j < nz; j++) {
1045               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1046               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1047               arrt += bs2;
1048             }
1049             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1050           } else if (kaij->isTI) {
1051             for (j = 0; j < nz; j++) {
1052               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1053               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1054               arrt += bs2;
1055             }
1056             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1057           }
1058 
1059           v     = aa + diag[i] + 1;
1060           vi    = aj + diag[i] + 1;
1061           nz    = ai[i + 1] - diag[i] - 1;
1062           workt = work;
1063           for (j = 0; j < nz; j++) {
1064             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1065             workt += bs;
1066           }
1067           arrt = arr;
1068           if (T) {
1069             for (j = 0; j < nz; j++) {
1070               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1071               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1072               arrt += bs2;
1073             }
1074             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1075           } else if (kaij->isTI) {
1076             for (j = 0; j < nz; j++) {
1077               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1078               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1079               arrt += bs2;
1080             }
1081             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1082           }
1083 
1084           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1085           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1086         }
1087       } else {
1088         for (i = m - 1; i >= 0; i--) {
1089           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1090           v     = aa + diag[i] + 1;
1091           vi    = aj + diag[i] + 1;
1092           nz    = ai[i + 1] - diag[i] - 1;
1093           workt = work;
1094           for (j = 0; j < nz; j++) {
1095             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1096             workt += bs;
1097           }
1098           arrt = arr;
1099           if (T) {
1100             for (j = 0; j < nz; j++) {
1101               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1102               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1103               arrt += bs2;
1104             }
1105             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1106           } else if (kaij->isTI) {
1107             for (j = 0; j < nz; j++) {
1108               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1109               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1110               arrt += bs2;
1111             }
1112             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1113           }
1114           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1115           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1116         }
1117       }
1118       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1119     }
1120   }
1121 
1122   PetscCall(VecRestoreArray(xx, &x));
1123   PetscCall(VecRestoreArrayRead(bb, &b));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 /*===================================================================================*/
1128 
1129 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1130 {
1131   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1132 
1133   PetscFunctionBegin;
1134   if (!yy) {
1135     PetscCall(VecSet(zz, 0.0));
1136   } else {
1137     PetscCall(VecCopy(yy, zz));
1138   }
1139   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1140   /* start the scatter */
1141   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1142   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1143   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1144   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1149 {
1150   PetscFunctionBegin;
1151   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1156 {
1157   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1161   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1166 {
1167   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1168   PetscBool    diag = PETSC_FALSE;
1169   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1170   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1171 
1172   PetscFunctionBegin;
1173   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1174   b->getrowactive = PETSC_TRUE;
1175   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1176 
1177   if ((!S) && (!T) && (!b->isTI)) {
1178     if (ncols) *ncols = 0;
1179     if (cols) *cols = NULL;
1180     if (values) *values = NULL;
1181     PetscFunctionReturn(PETSC_SUCCESS);
1182   }
1183 
1184   if (T || b->isTI) {
1185     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1186     c = nzaij;
1187     for (i = 0; i < nzaij; i++) {
1188       /* check if this row contains a diagonal entry */
1189       if (colsaij[i] == r) {
1190         diag = PETSC_TRUE;
1191         c    = i;
1192       }
1193     }
1194   } else nzaij = c = 0;
1195 
1196   /* calculate size of row */
1197   nz = 0;
1198   if (S) nz += q;
1199   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1200 
1201   if (cols || values) {
1202     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1203     for (i = 0; i < q; i++) {
1204       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1205       v[i] = 0.0;
1206     }
1207     if (b->isTI) {
1208       for (i = 0; i < nzaij; i++) {
1209         for (j = 0; j < q; j++) {
1210           idx[i * q + j] = colsaij[i] * q + j;
1211           v[i * q + j]   = (j == s ? vaij[i] : 0);
1212         }
1213       }
1214     } else if (T) {
1215       for (i = 0; i < nzaij; i++) {
1216         for (j = 0; j < q; j++) {
1217           idx[i * q + j] = colsaij[i] * q + j;
1218           v[i * q + j]   = vaij[i] * T[s + j * p];
1219         }
1220       }
1221     }
1222     if (S) {
1223       for (j = 0; j < q; j++) {
1224         idx[c * q + j] = r * q + j;
1225         v[c * q + j] += S[s + j * p];
1226       }
1227     }
1228   }
1229 
1230   if (ncols) *ncols = nz;
1231   if (cols) *cols = idx;
1232   if (values) *values = v;
1233   PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235 
1236 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1237 {
1238   PetscFunctionBegin;
1239   PetscCall(PetscFree2(*idx, *v));
1240   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1245 {
1246   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1247   Mat            AIJ  = b->A;
1248   PetscBool      diag = PETSC_FALSE;
1249   Mat            MatAIJ, MatOAIJ;
1250   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1251   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1252   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1256   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1257   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1258   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1259   b->getrowactive = PETSC_TRUE;
1260   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1261   lrow = row - rstart;
1262 
1263   if ((!S) && (!T) && (!b->isTI)) {
1264     if (ncols) *ncols = 0;
1265     if (cols) *cols = NULL;
1266     if (values) *values = NULL;
1267     PetscFunctionReturn(PETSC_SUCCESS);
1268   }
1269 
1270   r = lrow / p;
1271   s = lrow % p;
1272 
1273   if (T || b->isTI) {
1274     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1275     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1276     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1277 
1278     c = ncolsaij + ncolsoaij;
1279     for (i = 0; i < ncolsaij; i++) {
1280       /* check if this row contains a diagonal entry */
1281       if (colsaij[i] == r) {
1282         diag = PETSC_TRUE;
1283         c    = i;
1284       }
1285     }
1286   } else c = 0;
1287 
1288   /* calculate size of row */
1289   nz = 0;
1290   if (S) nz += q;
1291   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1292 
1293   if (cols || values) {
1294     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1295     for (i = 0; i < q; i++) {
1296       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1297       v[i] = 0.0;
1298     }
1299     if (b->isTI) {
1300       for (i = 0; i < ncolsaij; i++) {
1301         for (j = 0; j < q; j++) {
1302           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1303           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1304         }
1305       }
1306       for (i = 0; i < ncolsoaij; i++) {
1307         for (j = 0; j < q; j++) {
1308           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1309           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1310         }
1311       }
1312     } else if (T) {
1313       for (i = 0; i < ncolsaij; i++) {
1314         for (j = 0; j < q; j++) {
1315           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1316           v[i * q + j]   = vals[i] * T[s + j * p];
1317         }
1318       }
1319       for (i = 0; i < ncolsoaij; i++) {
1320         for (j = 0; j < q; j++) {
1321           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1322           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1323         }
1324       }
1325     }
1326     if (S) {
1327       for (j = 0; j < q; j++) {
1328         idx[c * q + j] = (r + rstart / p) * q + j;
1329         v[c * q + j] += S[s + j * p];
1330       }
1331     }
1332   }
1333 
1334   if (ncols) *ncols = nz;
1335   if (cols) *cols = idx;
1336   if (values) *values = v;
1337   PetscFunctionReturn(PETSC_SUCCESS);
1338 }
1339 
1340 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1341 {
1342   PetscFunctionBegin;
1343   PetscCall(PetscFree2(*idx, *v));
1344   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1345   PetscFunctionReturn(PETSC_SUCCESS);
1346 }
1347 
1348 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1349 {
1350   Mat A;
1351 
1352   PetscFunctionBegin;
1353   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1354   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1355   PetscCall(MatDestroy(&A));
1356   PetscFunctionReturn(PETSC_SUCCESS);
1357 }
1358 
1359 /*@C
1360   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1361 
1362   Collective
1363 
1364   Input Parameters:
1365 + A - the `MATAIJ` matrix
1366 . p - number of rows in `S` and `T`
1367 . q - number of columns in `S` and `T`
1368 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1369 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1370 
1371   Output Parameter:
1372 . kaij - the new `MATKAIJ` matrix
1373 
1374   Level: advanced
1375 
1376   Notes:
1377   The created matrix is of the following form\:
1378 .vb
1379     [I \otimes S + A \otimes T]
1380 .ve
1381   where
1382 .vb
1383   S is a dense (p \times q) matrix
1384   T is a dense (p \times q) matrix
1385   A is a `MATAIJ`  (n \times n) matrix
1386   I is the identity matrix
1387 .ve
1388   The resulting matrix is (np \times nq)
1389 
1390   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1391   column-major format.
1392 
1393   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1394 
1395   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1396 
1397   Developer Notes:
1398   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1399   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1400   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1401   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1402 
1403 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1404 @*/
1405 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1406 {
1407   PetscFunctionBegin;
1408   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1409   PetscCall(MatSetType(*kaij, MATKAIJ));
1410   PetscCall(MatKAIJSetAIJ(*kaij, A));
1411   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1412   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1413   PetscCall(MatSetUp(*kaij));
1414   PetscFunctionReturn(PETSC_SUCCESS);
1415 }
1416 
1417 /*MC
1418   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1419     [I \otimes S + A \otimes T],
1420   where
1421 .vb
1422     S is a dense (p \times q) matrix,
1423     T is a dense (p \times q) matrix,
1424     A is an AIJ  (n \times n) matrix,
1425     and I is the identity matrix.
1426 .ve
1427   The resulting matrix is (np \times nq).
1428 
1429   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1430 
1431   Level: advanced
1432 
1433   Note:
1434   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1435   where x and b are column vectors containing the row-major representations of X and B.
1436 
1437 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1438 M*/
1439 
1440 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1441 {
1442   Mat_MPIKAIJ *b;
1443   PetscMPIInt  size;
1444 
1445   PetscFunctionBegin;
1446   PetscCall(PetscNew(&b));
1447   A->data = (void *)b;
1448 
1449   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1450 
1451   b->w = NULL;
1452   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1453   if (size == 1) {
1454     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1455     A->ops->destroy             = MatDestroy_SeqKAIJ;
1456     A->ops->mult                = MatMult_SeqKAIJ;
1457     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1458     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1459     A->ops->getrow              = MatGetRow_SeqKAIJ;
1460     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1461     A->ops->sor                 = MatSOR_SeqKAIJ;
1462     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1463   } else {
1464     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1465     A->ops->destroy             = MatDestroy_MPIKAIJ;
1466     A->ops->mult                = MatMult_MPIKAIJ;
1467     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1468     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1469     A->ops->getrow              = MatGetRow_MPIKAIJ;
1470     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1471     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1472     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1473   }
1474   A->ops->setup           = MatSetUp_KAIJ;
1475   A->ops->view            = MatView_KAIJ;
1476   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479