xref: /petsc/src/mat/impls/kaij/kaij.c (revision b2ccae6bdc8edea944f1c160ca3b2eb32c69ecb2)
1 /*
2   Defines the basic matrix operations for the KAIJ  matrix storage format.
3   This format is used to evaluate matrices of the form:
4 
5     [I \otimes S + A \otimes T]
6 
7   where
8     S is a dense (p \times q) matrix
9     T is a dense (p \times q) matrix
10     A is an AIJ  (n \times n) matrix
11     I is the identity matrix
12 
13   The resulting matrix is (np \times nq)
14 
15   We provide:
16      MatMult()
17      MatMultAdd()
18      MatInvertBlockDiagonal()
19   and
20      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
21 
22   This single directory handles both the sequential and parallel codes
23 */
24 
25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
26 #include <../src/mat/utils/freespace.h>
27 #include <petsc/private/vecimpl.h>
28 
29 /*@
30   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
31 
32   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
33 
34   Input Parameter:
35 . A - the `MATKAIJ` matrix
36 
37   Output Parameter:
38 . B - the `MATAIJ` matrix
39 
40   Level: advanced
41 
42   Note:
43   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
44 
45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46 @*/
47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48 {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 /*@C
67   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
68 
69   Not Collective; the entire `S` is stored and returned independently on all processes.
70 
71   Input Parameter:
72 . A - the `MATKAIJ` matrix
73 
74   Output Parameters:
75 + m - the number of rows in `S`
76 . n - the number of columns in `S`
77 - S - the S matrix, in form of a scalar array in column-major format
78 
79   Level: advanced
80 
81   Note:
82   All output parameters are optional (pass `NULL` if not desired)
83 
84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[])
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89 
90   PetscFunctionBegin;
91   if (m) *m = b->p;
92   if (n) *n = b->q;
93   if (S) *S = b->S;
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 /*@C
98   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
99 
100   Not Collective; the entire `S` is stored and returned independently on all processes.
101 
102   Input Parameter:
103 . A - the `MATKAIJ` matrix
104 
105   Output Parameters:
106 + m - the number of rows in `S`
107 . n - the number of columns in `S`
108 - S - the S matrix, in form of a scalar array in column-major format
109 
110   Level: advanced
111 
112   Note:
113   All output parameters are optional (pass `NULL` if not desired)
114 
115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116 @*/
117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[])
118 {
119   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120 
121   PetscFunctionBegin;
122   if (m) *m = b->p;
123   if (n) *n = b->q;
124   if (S) *S = b->S;
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 /*@C
129   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
130 
131   Not Collective
132 
133   Input Parameters:
134 + A - the `MATKAIJ` matrix
135 - S - location of pointer to array obtained with `MatKAIJGetS()`
136 
137   Level: advanced
138 
139   Note:
140   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
141   If `NULL` is passed, it will not attempt to zero the array pointer.
142 
143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
144 @*/
145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[])
146 {
147   PetscFunctionBegin;
148   if (S) *S = NULL;
149   PetscCall(PetscObjectStateIncrease((PetscObject)A));
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 /*@C
154   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
155 
156   Not Collective
157 
158   Input Parameters:
159 + A - the `MATKAIJ` matrix
160 - S - location of pointer to array obtained with `MatKAIJGetS()`
161 
162   Level: advanced
163 
164   Note:
165   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
166   If `NULL` is passed, it will not attempt to zero the array pointer.
167 
168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
169 @*/
170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[])
171 {
172   PetscFunctionBegin;
173   if (S) *S = NULL;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /*@C
178   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
179 
180   Not Collective; the entire `T` is stored and returned independently on all processes
181 
182   Input Parameter:
183 . A - the `MATKAIJ` matrix
184 
185   Output Parameters:
186 + m - the number of rows in `T`
187 . n - the number of columns in `T`
188 - T - the T matrix, in form of a scalar array in column-major format
189 
190   Level: advanced
191 
192   Note:
193   All output parameters are optional (pass `NULL` if not desired)
194 
195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
196 @*/
197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[])
198 {
199   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
200 
201   PetscFunctionBegin;
202   if (m) *m = b->p;
203   if (n) *n = b->q;
204   if (T) *T = b->T;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@C
209   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
210 
211   Not Collective; the entire `T` is stored and returned independently on all processes
212 
213   Input Parameter:
214 . A - the `MATKAIJ` matrix
215 
216   Output Parameters:
217 + m - the number of rows in `T`
218 . n - the number of columns in `T`
219 - T - the T matrix, in form of a scalar array in column-major format
220 
221   Level: advanced
222 
223   Note:
224   All output parameters are optional (pass `NULL` if not desired)
225 
226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
227 @*/
228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[])
229 {
230   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
231 
232   PetscFunctionBegin;
233   if (m) *m = b->p;
234   if (n) *n = b->q;
235   if (T) *T = b->T;
236   PetscFunctionReturn(PETSC_SUCCESS);
237 }
238 
239 /*@C
240   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
241 
242   Not Collective
243 
244   Input Parameters:
245 + A - the `MATKAIJ` matrix
246 - T - location of pointer to array obtained with `MatKAIJGetS()`
247 
248   Level: advanced
249 
250   Note:
251   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252   If `NULL` is passed, it will not attempt to zero the array pointer.
253 
254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
255 @*/
256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[])
257 {
258   PetscFunctionBegin;
259   if (T) *T = NULL;
260   PetscCall(PetscObjectStateIncrease((PetscObject)A));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*@C
265   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
266 
267   Not Collective
268 
269   Input Parameters:
270 + A - the `MATKAIJ` matrix
271 - T - location of pointer to array obtained with `MatKAIJGetS()`
272 
273   Level: advanced
274 
275   Note:
276   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
277   If `NULL` is passed, it will not attempt to zero the array pointer.
278 
279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
280 @*/
281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[])
282 {
283   PetscFunctionBegin;
284   if (T) *T = NULL;
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 /*@
289   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
290 
291   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
292 
293   Input Parameters:
294 + A - the `MATKAIJ` matrix
295 - B - the `MATAIJ` matrix
296 
297   Level: advanced
298 
299   Notes:
300   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
301 
302   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
303 
304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
305 @*/
306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
307 {
308   PetscMPIInt size;
309   PetscBool   flg;
310 
311   PetscFunctionBegin;
312   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
313   if (size == 1) {
314     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
315     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
316     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
317     a->AIJ         = B;
318   } else {
319     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
320     a->A           = B;
321   }
322   PetscCall(PetscObjectReference((PetscObject)B));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 /*@
327   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
328 
329   Logically Collective; the entire `S` is stored independently on all processes.
330 
331   Input Parameters:
332 + A - the `MATKAIJ` matrix
333 . p - the number of rows in `S`
334 . q - the number of columns in `S`
335 - S - the S matrix, in form of a scalar array in column-major format
336 
337   Level: advanced
338 
339   Notes:
340   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
341 
342   The `S` matrix is copied, so the user can destroy this array.
343 
344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
345 @*/
346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
347 {
348   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
349 
350   PetscFunctionBegin;
351   PetscCall(PetscFree(a->S));
352   if (S) {
353     PetscCall(PetscMalloc1(p * q, &a->S));
354     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
355   } else a->S = NULL;
356 
357   a->p = p;
358   a->q = q;
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@
363   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
364 
365   Logically Collective.
366 
367   Input Parameter:
368 . A - the `MATKAIJ` matrix
369 
370   Output Parameter:
371 . identity - the Boolean value
372 
373   Level: advanced
374 
375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
376 @*/
377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
378 {
379   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
380   PetscInt     i, j;
381 
382   PetscFunctionBegin;
383   if (a->p != a->q) {
384     *identity = PETSC_FALSE;
385     PetscFunctionReturn(PETSC_SUCCESS);
386   } else *identity = PETSC_TRUE;
387   if (!a->isTI || a->S) {
388     for (i = 0; i < a->p && *identity; i++) {
389       for (j = 0; j < a->p && *identity; j++) {
390         if (i != j) {
391           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
392           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
393         } else {
394           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
395           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
396         }
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*@
404   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
405 
406   Logically Collective; the entire `T` is stored independently on all processes.
407 
408   Input Parameters:
409 + A - the `MATKAIJ` matrix
410 . p - the number of rows in `S`
411 . q - the number of columns in `S`
412 - T - the `T` matrix, in form of a scalar array in column-major format
413 
414   Level: advanced
415 
416   Notes:
417   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
418 
419   The `T` matrix is copied, so the user can destroy this array.
420 
421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
422 @*/
423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
424 {
425   PetscInt     i, j;
426   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
427   PetscBool    isTI = PETSC_FALSE;
428 
429   PetscFunctionBegin;
430   /* check if T is an identity matrix */
431   if (T && (p == q)) {
432     isTI = PETSC_TRUE;
433     for (i = 0; i < p; i++) {
434       for (j = 0; j < q; j++) {
435         if (i == j) {
436           /* diagonal term must be 1 */
437           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
438         } else {
439           /* off-diagonal term must be 0 */
440           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
441         }
442       }
443     }
444   }
445   a->isTI = isTI;
446 
447   PetscCall(PetscFree(a->T));
448   if (T && (!isTI)) {
449     PetscCall(PetscMalloc1(p * q, &a->T));
450     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
451   } else a->T = NULL;
452 
453   a->p = p;
454   a->q = q;
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
459 {
460   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
461 
462   PetscFunctionBegin;
463   PetscCall(MatDestroy(&b->AIJ));
464   PetscCall(PetscFree(b->S));
465   PetscCall(PetscFree(b->T));
466   PetscCall(PetscFree(b->ibdiag));
467   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
468   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
469   PetscCall(PetscFree(A->data));
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
474 {
475   Mat_MPIKAIJ     *a;
476   Mat_MPIAIJ      *mpiaij;
477   PetscScalar     *T;
478   PetscInt         i, j;
479   PetscObjectState state;
480 
481   PetscFunctionBegin;
482   a      = (Mat_MPIKAIJ *)A->data;
483   mpiaij = (Mat_MPIAIJ *)a->A->data;
484 
485   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
486   if (state == a->state) {
487     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
488     PetscFunctionReturn(PETSC_SUCCESS);
489   } else {
490     PetscCall(MatDestroy(&a->AIJ));
491     PetscCall(MatDestroy(&a->OAIJ));
492     if (a->isTI) {
493       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
494        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
495        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
496        * to pass in. */
497       PetscCall(PetscMalloc1(a->p * a->q, &T));
498       for (i = 0; i < a->p; i++) {
499         for (j = 0; j < a->q; j++) {
500           if (i == j) T[i + j * a->p] = 1.0;
501           else T[i + j * a->p] = 0.0;
502         }
503       }
504     } else T = a->T;
505     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
506     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
507     if (a->isTI) PetscCall(PetscFree(T));
508     a->state = state;
509   }
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 static PetscErrorCode MatSetUp_KAIJ(Mat A)
514 {
515   PetscInt     n;
516   PetscMPIInt  size;
517   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
518 
519   PetscFunctionBegin;
520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
521   if (size == 1) {
522     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
523     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
524     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
525     PetscCall(PetscLayoutSetUp(A->rmap));
526     PetscCall(PetscLayoutSetUp(A->cmap));
527   } else {
528     Mat_MPIKAIJ *a;
529     Mat_MPIAIJ  *mpiaij;
530     IS           from, to;
531     Vec          gvec;
532 
533     a      = (Mat_MPIKAIJ *)A->data;
534     mpiaij = (Mat_MPIAIJ *)a->A->data;
535     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
536     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
537     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
538     PetscCall(PetscLayoutSetUp(A->rmap));
539     PetscCall(PetscLayoutSetUp(A->cmap));
540 
541     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
542 
543     PetscCall(VecGetSize(mpiaij->lvec, &n));
544     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
545     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
546     PetscCall(VecSetBlockSize(a->w, a->q));
547     PetscCall(VecSetType(a->w, VECSEQ));
548 
549     /* create two temporary Index sets for build scatter gather */
550     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
551     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
552 
553     /* create temporary global vector to generate scatter context */
554     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
555 
556     /* generate the scatter context */
557     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
558 
559     PetscCall(ISDestroy(&from));
560     PetscCall(ISDestroy(&to));
561     PetscCall(VecDestroy(&gvec));
562   }
563 
564   A->assembled = PETSC_TRUE;
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
569 {
570   PetscViewerFormat format;
571   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
572   Mat               B;
573   PetscInt          i;
574   PetscBool         ismpikaij;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
578   PetscCall(PetscViewerGetFormat(viewer, &format));
579   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
580     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
581 
582     /* Print appropriate details for S. */
583     if (!a->S) {
584       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
585     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
586       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
587       for (i = 0; i < (a->p * a->q); i++) {
588 #if defined(PETSC_USE_COMPLEX)
589         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
590 #else
591         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
592 #endif
593       }
594       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
595     }
596 
597     /* Print appropriate details for T. */
598     if (a->isTI) {
599       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
600     } else if (!a->T) {
601       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
602     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
604       for (i = 0; i < (a->p * a->q); i++) {
605 #if defined(PETSC_USE_COMPLEX)
606         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
607 #else
608         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
609 #endif
610       }
611       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
612     }
613 
614     /* Now print details for the AIJ matrix, using the AIJ viewer. */
615     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
616     if (ismpikaij) {
617       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
618       PetscCall(MatView(b->A, viewer));
619     } else {
620       PetscCall(MatView(a->AIJ, viewer));
621     }
622 
623   } else {
624     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
625     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
626     PetscCall(MatView(B, viewer));
627     PetscCall(MatDestroy(&B));
628   }
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
633 {
634   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
635 
636   PetscFunctionBegin;
637   PetscCall(MatDestroy(&b->AIJ));
638   PetscCall(MatDestroy(&b->OAIJ));
639   PetscCall(MatDestroy(&b->A));
640   PetscCall(VecScatterDestroy(&b->ctx));
641   PetscCall(VecDestroy(&b->w));
642   PetscCall(PetscFree(b->S));
643   PetscCall(PetscFree(b->T));
644   PetscCall(PetscFree(b->ibdiag));
645   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
647   PetscCall(PetscFree(A->data));
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 /* zz = yy + Axx */
652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
655   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656   const PetscScalar *s = b->S, *t = b->T;
657   const PetscScalar *x, *v, *bx;
658   PetscScalar       *y, *sums;
659   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
660   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
661 
662   PetscFunctionBegin;
663   if (!yy) {
664     PetscCall(VecSet(zz, 0.0));
665   } else {
666     PetscCall(VecCopy(yy, zz));
667   }
668   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
669 
670   PetscCall(VecGetArrayRead(xx, &x));
671   PetscCall(VecGetArray(zz, &y));
672   idx = a->j;
673   v   = a->a;
674   ii  = a->i;
675 
676   if (b->isTI) {
677     for (i = 0; i < m; i++) {
678       jrow = ii[i];
679       n    = ii[i + 1] - jrow;
680       sums = y + p * i;
681       for (j = 0; j < n; j++) {
682         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
683       }
684     }
685     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
686   } else if (t) {
687     for (i = 0; i < m; i++) {
688       jrow = ii[i];
689       n    = ii[i + 1] - jrow;
690       sums = y + p * i;
691       for (j = 0; j < n; j++) {
692         for (k = 0; k < p; k++) {
693           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
694         }
695       }
696     }
697     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
698      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
699      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
700      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
701     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
702   }
703   if (s) {
704     for (i = 0; i < m; i++) {
705       sums = y + p * i;
706       bx   = x + q * i;
707       if (i < b->AIJ->cmap->n) {
708         for (j = 0; j < q; j++) {
709           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
710         }
711       }
712     }
713     PetscCall(PetscLogFlops(2.0 * m * p * q));
714   }
715 
716   PetscCall(VecRestoreArrayRead(xx, &x));
717   PetscCall(VecRestoreArray(zz, &y));
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
722 {
723   PetscFunctionBegin;
724   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 #include <petsc/private/kernels/blockinvert.h>
729 
730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
731 {
732   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
733   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
734   const PetscScalar *S = b->S;
735   const PetscScalar *T = b->T;
736   const PetscScalar *v = a->a;
737   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
738   PetscInt           i, j, *v_pivots, dof, dof2;
739   PetscScalar       *diag, aval, *v_work;
740 
741   PetscFunctionBegin;
742   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
743   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
744 
745   dof  = p;
746   dof2 = dof * dof;
747 
748   if (b->ibdiagvalid) {
749     if (values) *values = b->ibdiag;
750     PetscFunctionReturn(PETSC_SUCCESS);
751   }
752   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
753   if (values) *values = b->ibdiag;
754   diag = b->ibdiag;
755 
756   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
757   for (i = 0; i < m; i++) {
758     if (S) {
759       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
760     } else {
761       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
762     }
763     if (b->isTI) {
764       aval = 0;
765       for (j = ii[i]; j < ii[i + 1]; j++)
766         if (idx[j] == i) aval = v[j];
767       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j = ii[i]; j < ii[i + 1]; j++)
771         if (idx[j] == i) aval = v[j];
772       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
773     }
774     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
775     diag += dof2;
776   }
777   PetscCall(PetscFree2(v_work, v_pivots));
778 
779   b->ibdiagvalid = PETSC_TRUE;
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
784 {
785   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
786 
787   PetscFunctionBegin;
788   *B = kaij->AIJ;
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
793 {
794   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
795   Mat            AIJ, OAIJ, B;
796   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
797   const PetscInt p = a->p, q = a->q;
798   PetscBool      ismpikaij, missing;
799 
800   PetscFunctionBegin;
801   if (reuse != MAT_REUSE_MATRIX) {
802     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
803     if (ismpikaij) {
804       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
805       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
806       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
807     } else {
808       AIJ  = a->AIJ;
809       OAIJ = NULL;
810     }
811     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
812     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
813     PetscCall(MatSetType(B, MATAIJ));
814     PetscCall(MatGetSize(AIJ, &m, NULL));
815     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
816     if (!missing || !a->S) d = m;
817     PetscCall(PetscMalloc1(m * p, &d_nnz));
818     for (i = 0; i < m; ++i) {
819       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
821       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
822     }
823     if (OAIJ) {
824       PetscCall(PetscMalloc1(m * p, &o_nnz));
825       for (i = 0; i < m; ++i) {
826         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
828         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
829       }
830       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
831     } else {
832       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
833     }
834     PetscCall(PetscFree(d_nnz));
835     PetscCall(PetscFree(o_nnz));
836   } else B = *newmat;
837   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
838   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B));
839   else *newmat = B;
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
844 {
845   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
846   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
847   const PetscScalar *aa = a->a, *T = kaij->T, *v;
848   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
849   const PetscScalar *b, *xb, *idiag;
850   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
851   PetscInt           i, j, k, i2, bs, bs2, nz;
852 
853   PetscFunctionBegin;
854   its = its * lits;
855   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
856   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
857   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
858   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
859   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
860   bs  = p;
861   bs2 = bs * bs;
862 
863   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
864 
865   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
866   idiag = kaij->ibdiag;
867   diag  = a->diag;
868 
869   if (!kaij->sor.setup) {
870     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
871     kaij->sor.setup = PETSC_TRUE;
872   }
873   y    = kaij->sor.y;
874   w    = kaij->sor.w;
875   work = kaij->sor.work;
876   t    = kaij->sor.t;
877   arr  = kaij->sor.arr;
878 
879   PetscCall(VecGetArray(xx, &x));
880   PetscCall(VecGetArrayRead(bb, &b));
881 
882   if (flag & SOR_ZERO_INITIAL_GUESS) {
883     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
884       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
885       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
886       i2 = bs;
887       idiag += bs2;
888       for (i = 1; i < m; i++) {
889         v  = aa + ai[i];
890         vi = aj + ai[i];
891         nz = diag[i] - ai[i];
892 
893         if (T) { /* b - T (Arow * x) */
894           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
895           for (j = 0; j < nz; j++) {
896             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
897           }
898           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
899           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
900         } else if (kaij->isTI) {
901           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
902           for (j = 0; j < nz; j++) {
903             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
904           }
905         } else {
906           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
907         }
908 
909         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
910         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
911 
912         idiag += bs2;
913         i2 += bs;
914       }
915       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
916       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
917       xb = t;
918     } else xb = b;
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
920       idiag = kaij->ibdiag + bs2 * (m - 1);
921       i2    = bs * (m - 1);
922       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
923       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
924       i2 -= bs;
925       idiag -= bs2;
926       for (i = m - 2; i >= 0; i--) {
927         v  = aa + diag[i] + 1;
928         vi = aj + diag[i] + 1;
929         nz = ai[i + 1] - diag[i] - 1;
930 
931         if (T) { /* FIXME: This branch untested */
932           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j = 0; j < nz; j++) {
936             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
937             workt += bs;
938           }
939           arrt = arr;
940           for (j = 0; j < nz; j++) {
941             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
942             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
943             arrt += bs2;
944           }
945           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
946         } else if (kaij->isTI) {
947           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
948           for (j = 0; j < nz; j++) {
949             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
950           }
951         }
952 
953         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
954         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
955 
956         idiag -= bs2;
957         i2 -= bs;
958       }
959       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
960     }
961     its--;
962   }
963   while (its--) { /* FIXME: This branch not updated */
964     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
965       i2    = 0;
966       idiag = kaij->ibdiag;
967       for (i = 0; i < m; i++) {
968         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
969 
970         v     = aa + ai[i];
971         vi    = aj + ai[i];
972         nz    = diag[i] - ai[i];
973         workt = work;
974         for (j = 0; j < nz; j++) {
975           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
976           workt += bs;
977         }
978         arrt = arr;
979         if (T) {
980           for (j = 0; j < nz; j++) {
981             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
982             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
983             arrt += bs2;
984           }
985           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
986         } else if (kaij->isTI) {
987           for (j = 0; j < nz; j++) {
988             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
989             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
990             arrt += bs2;
991           }
992           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
993         }
994         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
995 
996         v     = aa + diag[i] + 1;
997         vi    = aj + diag[i] + 1;
998         nz    = ai[i + 1] - diag[i] - 1;
999         workt = work;
1000         for (j = 0; j < nz; j++) {
1001           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1002           workt += bs;
1003         }
1004         arrt = arr;
1005         if (T) {
1006           for (j = 0; j < nz; j++) {
1007             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1008             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1009             arrt += bs2;
1010           }
1011           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1012         } else if (kaij->isTI) {
1013           for (j = 0; j < nz; j++) {
1014             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1015             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1016             arrt += bs2;
1017           }
1018           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1019         }
1020 
1021         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1022         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1023 
1024         idiag += bs2;
1025         i2 += bs;
1026       }
1027       xb = t;
1028     } else xb = b;
1029     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1030       idiag = kaij->ibdiag + bs2 * (m - 1);
1031       i2    = bs * (m - 1);
1032       if (xb == b) {
1033         for (i = m - 1; i >= 0; i--) {
1034           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1035 
1036           v     = aa + ai[i];
1037           vi    = aj + ai[i];
1038           nz    = diag[i] - ai[i];
1039           workt = work;
1040           for (j = 0; j < nz; j++) {
1041             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1042             workt += bs;
1043           }
1044           arrt = arr;
1045           if (T) {
1046             for (j = 0; j < nz; j++) {
1047               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1048               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1049               arrt += bs2;
1050             }
1051             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1052           } else if (kaij->isTI) {
1053             for (j = 0; j < nz; j++) {
1054               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1055               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1056               arrt += bs2;
1057             }
1058             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1059           }
1060 
1061           v     = aa + diag[i] + 1;
1062           vi    = aj + diag[i] + 1;
1063           nz    = ai[i + 1] - diag[i] - 1;
1064           workt = work;
1065           for (j = 0; j < nz; j++) {
1066             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1067             workt += bs;
1068           }
1069           arrt = arr;
1070           if (T) {
1071             for (j = 0; j < nz; j++) {
1072               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1073               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1074               arrt += bs2;
1075             }
1076             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1077           } else if (kaij->isTI) {
1078             for (j = 0; j < nz; j++) {
1079               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1080               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1081               arrt += bs2;
1082             }
1083             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1084           }
1085 
1086           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1087           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1088         }
1089       } else {
1090         for (i = m - 1; i >= 0; i--) {
1091           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1092           v     = aa + diag[i] + 1;
1093           vi    = aj + diag[i] + 1;
1094           nz    = ai[i + 1] - diag[i] - 1;
1095           workt = work;
1096           for (j = 0; j < nz; j++) {
1097             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1098             workt += bs;
1099           }
1100           arrt = arr;
1101           if (T) {
1102             for (j = 0; j < nz; j++) {
1103               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1104               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1105               arrt += bs2;
1106             }
1107             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1108           } else if (kaij->isTI) {
1109             for (j = 0; j < nz; j++) {
1110               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1111               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1112               arrt += bs2;
1113             }
1114             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1115           }
1116           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1117           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1118         }
1119       }
1120       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1121     }
1122   }
1123 
1124   PetscCall(VecRestoreArray(xx, &x));
1125   PetscCall(VecRestoreArrayRead(bb, &b));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 /*===================================================================================*/
1130 
1131 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1132 {
1133   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (!yy) {
1137     PetscCall(VecSet(zz, 0.0));
1138   } else {
1139     PetscCall(VecCopy(yy, zz));
1140   }
1141   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1142   /* start the scatter */
1143   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1144   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1145   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1146   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1151 {
1152   PetscFunctionBegin;
1153   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1158 {
1159   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1160 
1161   PetscFunctionBegin;
1162   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1163   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1168 {
1169   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1170   PetscBool    diag = PETSC_FALSE;
1171   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1172   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1173 
1174   PetscFunctionBegin;
1175   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1176   b->getrowactive = PETSC_TRUE;
1177   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1178 
1179   if ((!S) && (!T) && (!b->isTI)) {
1180     if (ncols) *ncols = 0;
1181     if (cols) *cols = NULL;
1182     if (values) *values = NULL;
1183     PetscFunctionReturn(PETSC_SUCCESS);
1184   }
1185 
1186   if (T || b->isTI) {
1187     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1188     c = nzaij;
1189     for (i = 0; i < nzaij; i++) {
1190       /* check if this row contains a diagonal entry */
1191       if (colsaij[i] == r) {
1192         diag = PETSC_TRUE;
1193         c    = i;
1194       }
1195     }
1196   } else nzaij = c = 0;
1197 
1198   /* calculate size of row */
1199   nz = 0;
1200   if (S) nz += q;
1201   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1202 
1203   if (cols || values) {
1204     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1205     for (i = 0; i < q; i++) {
1206       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1207       v[i] = 0.0;
1208     }
1209     if (b->isTI) {
1210       for (i = 0; i < nzaij; i++) {
1211         for (j = 0; j < q; j++) {
1212           idx[i * q + j] = colsaij[i] * q + j;
1213           v[i * q + j]   = (j == s ? vaij[i] : 0);
1214         }
1215       }
1216     } else if (T) {
1217       for (i = 0; i < nzaij; i++) {
1218         for (j = 0; j < q; j++) {
1219           idx[i * q + j] = colsaij[i] * q + j;
1220           v[i * q + j]   = vaij[i] * T[s + j * p];
1221         }
1222       }
1223     }
1224     if (S) {
1225       for (j = 0; j < q; j++) {
1226         idx[c * q + j] = r * q + j;
1227         v[c * q + j] += S[s + j * p];
1228       }
1229     }
1230   }
1231 
1232   if (ncols) *ncols = nz;
1233   if (cols) *cols = idx;
1234   if (values) *values = v;
1235   PetscFunctionReturn(PETSC_SUCCESS);
1236 }
1237 
1238 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1239 {
1240   PetscFunctionBegin;
1241   PetscCall(PetscFree2(*idx, *v));
1242   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
1246 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1247 {
1248   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1249   Mat            AIJ  = b->A;
1250   PetscBool      diag = PETSC_FALSE;
1251   Mat            MatAIJ, MatOAIJ;
1252   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1253   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1254   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1255 
1256   PetscFunctionBegin;
1257   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1258   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1259   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1260   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1261   b->getrowactive = PETSC_TRUE;
1262   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1263   lrow = row - rstart;
1264 
1265   if ((!S) && (!T) && (!b->isTI)) {
1266     if (ncols) *ncols = 0;
1267     if (cols) *cols = NULL;
1268     if (values) *values = NULL;
1269     PetscFunctionReturn(PETSC_SUCCESS);
1270   }
1271 
1272   r = lrow / p;
1273   s = lrow % p;
1274 
1275   if (T || b->isTI) {
1276     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1277     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1278     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1279 
1280     c = ncolsaij + ncolsoaij;
1281     for (i = 0; i < ncolsaij; i++) {
1282       /* check if this row contains a diagonal entry */
1283       if (colsaij[i] == r) {
1284         diag = PETSC_TRUE;
1285         c    = i;
1286       }
1287     }
1288   } else c = 0;
1289 
1290   /* calculate size of row */
1291   nz = 0;
1292   if (S) nz += q;
1293   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1294 
1295   if (cols || values) {
1296     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1297     for (i = 0; i < q; i++) {
1298       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1299       v[i] = 0.0;
1300     }
1301     if (b->isTI) {
1302       for (i = 0; i < ncolsaij; i++) {
1303         for (j = 0; j < q; j++) {
1304           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1305           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1306         }
1307       }
1308       for (i = 0; i < ncolsoaij; i++) {
1309         for (j = 0; j < q; j++) {
1310           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1311           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1312         }
1313       }
1314     } else if (T) {
1315       for (i = 0; i < ncolsaij; i++) {
1316         for (j = 0; j < q; j++) {
1317           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1318           v[i * q + j]   = vals[i] * T[s + j * p];
1319         }
1320       }
1321       for (i = 0; i < ncolsoaij; i++) {
1322         for (j = 0; j < q; j++) {
1323           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1324           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1325         }
1326       }
1327     }
1328     if (S) {
1329       for (j = 0; j < q; j++) {
1330         idx[c * q + j] = (r + rstart / p) * q + j;
1331         v[c * q + j] += S[s + j * p];
1332       }
1333     }
1334   }
1335 
1336   if (ncols) *ncols = nz;
1337   if (cols) *cols = idx;
1338   if (values) *values = v;
1339   PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341 
1342 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1343 {
1344   PetscFunctionBegin;
1345   PetscCall(PetscFree2(*idx, *v));
1346   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1347   PetscFunctionReturn(PETSC_SUCCESS);
1348 }
1349 
1350 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1351 {
1352   Mat A;
1353 
1354   PetscFunctionBegin;
1355   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1356   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1357   PetscCall(MatDestroy(&A));
1358   PetscFunctionReturn(PETSC_SUCCESS);
1359 }
1360 
1361 /*@C
1362   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1363 
1364   Collective
1365 
1366   Input Parameters:
1367 + A - the `MATAIJ` matrix
1368 . p - number of rows in `S` and `T`
1369 . q - number of columns in `S` and `T`
1370 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1371 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1372 
1373   Output Parameter:
1374 . kaij - the new `MATKAIJ` matrix
1375 
1376   Level: advanced
1377 
1378   Notes:
1379   The created matrix is of the following form\:
1380 .vb
1381     [I \otimes S + A \otimes T]
1382 .ve
1383   where
1384 .vb
1385   S is a dense (p \times q) matrix
1386   T is a dense (p \times q) matrix
1387   A is a `MATAIJ`  (n \times n) matrix
1388   I is the identity matrix
1389 .ve
1390   The resulting matrix is (np \times nq)
1391 
1392   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1393   column-major format.
1394 
1395   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1396 
1397   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1398 
1399   Developer Notes:
1400   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1401   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1402   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1403   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1404 
1405 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1406 @*/
1407 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1408 {
1409   PetscFunctionBegin;
1410   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1411   PetscCall(MatSetType(*kaij, MATKAIJ));
1412   PetscCall(MatKAIJSetAIJ(*kaij, A));
1413   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1414   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1415   PetscCall(MatSetUp(*kaij));
1416   PetscFunctionReturn(PETSC_SUCCESS);
1417 }
1418 
1419 /*MC
1420   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1421     [I \otimes S + A \otimes T],
1422   where
1423 .vb
1424     S is a dense (p \times q) matrix,
1425     T is a dense (p \times q) matrix,
1426     A is an AIJ  (n \times n) matrix,
1427     and I is the identity matrix.
1428 .ve
1429   The resulting matrix is (np \times nq).
1430 
1431   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1432 
1433   Level: advanced
1434 
1435   Note:
1436   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1437   where x and b are column vectors containing the row-major representations of X and B.
1438 
1439 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1440 M*/
1441 
1442 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1443 {
1444   Mat_MPIKAIJ *b;
1445   PetscMPIInt  size;
1446 
1447   PetscFunctionBegin;
1448   PetscCall(PetscNew(&b));
1449   A->data = (void *)b;
1450 
1451   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1452 
1453   b->w = NULL;
1454   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1455   if (size == 1) {
1456     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1457     A->ops->destroy             = MatDestroy_SeqKAIJ;
1458     A->ops->mult                = MatMult_SeqKAIJ;
1459     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1460     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1461     A->ops->getrow              = MatGetRow_SeqKAIJ;
1462     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1463     A->ops->sor                 = MatSOR_SeqKAIJ;
1464     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1465   } else {
1466     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1467     A->ops->destroy             = MatDestroy_MPIKAIJ;
1468     A->ops->mult                = MatMult_MPIKAIJ;
1469     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1470     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1471     A->ops->getrow              = MatGetRow_MPIKAIJ;
1472     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1473     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1474     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1475   }
1476   A->ops->setup           = MatSetUp_KAIJ;
1477   A->ops->view            = MatView_KAIJ;
1478   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1479   PetscFunctionReturn(PETSC_SUCCESS);
1480 }
1481