xref: /petsc/src/mat/impls/kaij/kaij.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2   Defines the basic matrix operations for the KAIJ  matrix storage format.
3   This format is used to evaluate matrices of the form:
4 
5     [I \otimes S + A \otimes T]
6 
7   where
8     S is a dense (p \times q) matrix
9     T is a dense (p \times q) matrix
10     A is an AIJ  (n \times n) matrix
11     I is the identity matrix
12 
13   The resulting matrix is (np \times nq)
14 
15   We provide:
16      MatMult()
17      MatMultAdd()
18      MatInvertBlockDiagonal()
19   and
20      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
21 
22   This single directory handles both the sequential and parallel codes
23 */
24 
25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
26 #include <../src/mat/utils/freespace.h>
27 #include <petsc/private/vecimpl.h>
28 
29 /*@C
30   MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
31 
32   Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
33 
34   Input Parameter:
35 . A - the `MATKAIJ` matrix
36 
37   Output Parameter:
38 . B - the `MATAIJ` matrix
39 
40   Level: advanced
41 
42   Note:
43   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
44 
45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
46 @*/
47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
48 {
49   PetscBool ismpikaij, isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
66 /*@C
67   MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
68 
69   Not Collective; the entire `S` is stored and returned independently on all processes.
70 
71   Input Parameter:
72 . A - the `MATKAIJ` matrix
73 
74   Output Parameters:
75 + m - the number of rows in `S`
76 . n - the number of columns in `S`
77 - S - the S matrix, in form of a scalar array in column-major format
78 
79   Level: advanced
80 
81   Note:
82   All output parameters are optional (pass `NULL` if not desired)
83 
84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89 
90   PetscFunctionBegin;
91   if (m) *m = b->p;
92   if (n) *n = b->q;
93   if (S) *S = b->S;
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 /*@C
98   MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
99 
100   Not Collective; the entire `S` is stored and returned independently on all processes.
101 
102   Input Parameter:
103 . A - the `MATKAIJ` matrix
104 
105   Output Parameters:
106 + m - the number of rows in `S`
107 . n - the number of columns in `S`
108 - S - the S matrix, in form of a scalar array in column-major format
109 
110   Level: advanced
111 
112   Note:
113   All output parameters are optional (pass `NULL` if not desired)
114 
115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116 @*/
117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
118 {
119   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120 
121   PetscFunctionBegin;
122   if (m) *m = b->p;
123   if (n) *n = b->q;
124   if (S) *S = b->S;
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 /*@C
129   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
130 
131   Not Collective
132 
133   Input Parameters:
134 + A - the `MATKAIJ` matrix
135 - S - location of pointer to array obtained with `MatKAIJGetS()`
136 
137   Level: advanced
138 
139   Note:
140   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
141   If `NULL` is passed, it will not attempt to zero the array pointer.
142 
143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
144 @*/
145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
146 {
147   PetscFunctionBegin;
148   if (S) *S = NULL;
149   PetscCall(PetscObjectStateIncrease((PetscObject)A));
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 /*@C
154   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
155 
156   Not Collective
157 
158   Input Parameters:
159 + A - the `MATKAIJ` matrix
160 - S - location of pointer to array obtained with `MatKAIJGetS()`
161 
162   Level: advanced
163 
164   Note:
165   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
166   If `NULL` is passed, it will not attempt to zero the array pointer.
167 
168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`
169 @*/
170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
171 {
172   PetscFunctionBegin;
173   if (S) *S = NULL;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /*@C
178   MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
179 
180   Not Collective; the entire `T` is stored and returned independently on all processes
181 
182   Input Parameter:
183 . A - the `MATKAIJ` matrix
184 
185   Output Parameters:
186 + m - the number of rows in `T`
187 . n - the number of columns in `T`
188 - T - the T matrix, in form of a scalar array in column-major format
189 
190   Level: advanced
191 
192   Note:
193   All output parameters are optional (pass `NULL` if not desired)
194 
195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
196 @*/
197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
198 {
199   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
200 
201   PetscFunctionBegin;
202   if (m) *m = b->p;
203   if (n) *n = b->q;
204   if (T) *T = b->T;
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@C
209   MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
210 
211   Not Collective; the entire `T` is stored and returned independently on all processes
212 
213   Input Parameter:
214 . A - the `MATKAIJ` matrix
215 
216   Output Parameters:
217 + m - the number of rows in `T`
218 . n - the number of columns in `T`
219 - T - the T matrix, in form of a scalar array in column-major format
220 
221   Level: advanced
222 
223   Note:
224   All output parameters are optional (pass `NULL` if not desired)
225 
226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
227 @*/
228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
229 {
230   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
231 
232   PetscFunctionBegin;
233   if (m) *m = b->p;
234   if (n) *n = b->q;
235   if (T) *T = b->T;
236   PetscFunctionReturn(PETSC_SUCCESS);
237 }
238 
239 /*@C
240   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
241 
242   Not Collective
243 
244   Input Parameters:
245 + A - the `MATKAIJ` matrix
246 - T - location of pointer to array obtained with `MatKAIJGetS()`
247 
248   Level: advanced
249 
250   Note:
251   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
252   If `NULL` is passed, it will not attempt to zero the array pointer.
253 
254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
255 @*/
256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
257 {
258   PetscFunctionBegin;
259   if (T) *T = NULL;
260   PetscCall(PetscObjectStateIncrease((PetscObject)A));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*@C
265   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
266 
267   Not Collective
268 
269   Input Parameters:
270 + A - the `MATKAIJ` matrix
271 - T - location of pointer to array obtained with `MatKAIJGetS()`
272 
273   Level: advanced
274 
275   Note:
276   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
277   If `NULL` is passed, it will not attempt to zero the array pointer.
278 
279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`
280 @*/
281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
282 {
283   PetscFunctionBegin;
284   if (T) *T = NULL;
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 /*@
289   MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
290 
291   Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
292 
293   Input Parameters:
294 + A - the `MATKAIJ` matrix
295 - B - the `MATAIJ` matrix
296 
297   Level: advanced
298 
299   Notes:
300   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
301 
302   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
303 
304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
305 @*/
306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
307 {
308   PetscMPIInt size;
309   PetscBool   flg;
310 
311   PetscFunctionBegin;
312   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
313   if (size == 1) {
314     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
315     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
316     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
317     a->AIJ         = B;
318   } else {
319     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
320     a->A           = B;
321   }
322   PetscCall(PetscObjectReference((PetscObject)B));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 /*@C
327   MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
328 
329   Logically Collective; the entire `S` is stored independently on all processes.
330 
331   Input Parameters:
332 + A - the `MATKAIJ` matrix
333 . p - the number of rows in `S`
334 . q - the number of columns in `S`
335 - S - the S matrix, in form of a scalar array in column-major format
336 
337   Level: advanced
338 
339   Notes:
340   The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
341 
342   The `S` matrix is copied, so the user can destroy this array.
343 
344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
345 @*/
346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
347 {
348   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
349 
350   PetscFunctionBegin;
351   PetscCall(PetscFree(a->S));
352   if (S) {
353     PetscCall(PetscMalloc1(p * q, &a->S));
354     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
355   } else a->S = NULL;
356 
357   a->p = p;
358   a->q = q;
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@C
363   MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
364 
365   Logically Collective.
366 
367   Input Parameter:
368 . A - the `MATKAIJ` matrix
369 
370   Output Parameter:
371 . identity - the Boolean value
372 
373   Level: advanced
374 
375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
376 @*/
377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
378 {
379   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
380   PetscInt     i, j;
381 
382   PetscFunctionBegin;
383   if (a->p != a->q) {
384     *identity = PETSC_FALSE;
385     PetscFunctionReturn(PETSC_SUCCESS);
386   } else *identity = PETSC_TRUE;
387   if (!a->isTI || a->S) {
388     for (i = 0; i < a->p && *identity; i++) {
389       for (j = 0; j < a->p && *identity; j++) {
390         if (i != j) {
391           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
392           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
393         } else {
394           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
395           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
396         }
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*@C
404   MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
405 
406   Logically Collective; the entire `T` is stored independently on all processes.
407 
408   Input Parameters:
409 + A - the `MATKAIJ` matrix
410 . p - the number of rows in `S`
411 . q - the number of columns in `S`
412 - T - the `T` matrix, in form of a scalar array in column-major format
413 
414   Level: advanced
415 
416   Notes:
417   The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
418 
419   The `T` matrix is copied, so the user can destroy this array.
420 
421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
422 @*/
423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
424 {
425   PetscInt     i, j;
426   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
427   PetscBool    isTI = PETSC_FALSE;
428 
429   PetscFunctionBegin;
430   /* check if T is an identity matrix */
431   if (T && (p == q)) {
432     isTI = PETSC_TRUE;
433     for (i = 0; i < p; i++) {
434       for (j = 0; j < q; j++) {
435         if (i == j) {
436           /* diagonal term must be 1 */
437           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
438         } else {
439           /* off-diagonal term must be 0 */
440           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
441         }
442       }
443     }
444   }
445   a->isTI = isTI;
446 
447   PetscCall(PetscFree(a->T));
448   if (T && (!isTI)) {
449     PetscCall(PetscMalloc1(p * q, &a->T));
450     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
451   } else a->T = NULL;
452 
453   a->p = p;
454   a->q = q;
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
459 {
460   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
461 
462   PetscFunctionBegin;
463   PetscCall(MatDestroy(&b->AIJ));
464   PetscCall(PetscFree(b->S));
465   PetscCall(PetscFree(b->T));
466   PetscCall(PetscFree(b->ibdiag));
467   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
468   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
469   PetscCall(PetscFree(A->data));
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
474 {
475   Mat_MPIKAIJ     *a;
476   Mat_MPIAIJ      *mpiaij;
477   PetscScalar     *T;
478   PetscInt         i, j;
479   PetscObjectState state;
480 
481   PetscFunctionBegin;
482   a      = (Mat_MPIKAIJ *)A->data;
483   mpiaij = (Mat_MPIAIJ *)a->A->data;
484 
485   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
486   if (state == a->state) {
487     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
488     PetscFunctionReturn(PETSC_SUCCESS);
489   } else {
490     PetscCall(MatDestroy(&a->AIJ));
491     PetscCall(MatDestroy(&a->OAIJ));
492     if (a->isTI) {
493       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
494        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
495        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
496        * to pass in. */
497       PetscCall(PetscMalloc1(a->p * a->q, &T));
498       for (i = 0; i < a->p; i++) {
499         for (j = 0; j < a->q; j++) {
500           if (i == j) T[i + j * a->p] = 1.0;
501           else T[i + j * a->p] = 0.0;
502         }
503       }
504     } else T = a->T;
505     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
506     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
507     if (a->isTI) PetscCall(PetscFree(T));
508     a->state = state;
509   }
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 static PetscErrorCode MatSetUp_KAIJ(Mat A)
514 {
515   PetscInt     n;
516   PetscMPIInt  size;
517   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
518 
519   PetscFunctionBegin;
520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
521   if (size == 1) {
522     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
523     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
524     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
525     PetscCall(PetscLayoutSetUp(A->rmap));
526     PetscCall(PetscLayoutSetUp(A->cmap));
527   } else {
528     Mat_MPIKAIJ *a;
529     Mat_MPIAIJ  *mpiaij;
530     IS           from, to;
531     Vec          gvec;
532 
533     a      = (Mat_MPIKAIJ *)A->data;
534     mpiaij = (Mat_MPIAIJ *)a->A->data;
535     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
536     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
537     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
538     PetscCall(PetscLayoutSetUp(A->rmap));
539     PetscCall(PetscLayoutSetUp(A->cmap));
540 
541     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
542 
543     PetscCall(VecGetSize(mpiaij->lvec, &n));
544     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
545     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
546     PetscCall(VecSetBlockSize(a->w, a->q));
547     PetscCall(VecSetType(a->w, VECSEQ));
548 
549     /* create two temporary Index sets for build scatter gather */
550     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
551     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
552 
553     /* create temporary global vector to generate scatter context */
554     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
555 
556     /* generate the scatter context */
557     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
558 
559     PetscCall(ISDestroy(&from));
560     PetscCall(ISDestroy(&to));
561     PetscCall(VecDestroy(&gvec));
562   }
563 
564   A->assembled = PETSC_TRUE;
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
569 {
570   PetscViewerFormat format;
571   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
572   Mat               B;
573   PetscInt          i;
574   PetscBool         ismpikaij;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
578   PetscCall(PetscViewerGetFormat(viewer, &format));
579   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
580     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
581 
582     /* Print appropriate details for S. */
583     if (!a->S) {
584       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
585     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
586       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
587       for (i = 0; i < (a->p * a->q); i++) {
588 #if defined(PETSC_USE_COMPLEX)
589         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
590 #else
591         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
592 #endif
593       }
594       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
595     }
596 
597     /* Print appropriate details for T. */
598     if (a->isTI) {
599       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
600     } else if (!a->T) {
601       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
602     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
604       for (i = 0; i < (a->p * a->q); i++) {
605 #if defined(PETSC_USE_COMPLEX)
606         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
607 #else
608         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
609 #endif
610       }
611       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
612     }
613 
614     /* Now print details for the AIJ matrix, using the AIJ viewer. */
615     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
616     if (ismpikaij) {
617       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
618       PetscCall(MatView(b->A, viewer));
619     } else {
620       PetscCall(MatView(a->AIJ, viewer));
621     }
622 
623   } else {
624     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
625     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
626     PetscCall(MatView(B, viewer));
627     PetscCall(MatDestroy(&B));
628   }
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
633 {
634   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
635 
636   PetscFunctionBegin;
637   PetscCall(MatDestroy(&b->AIJ));
638   PetscCall(MatDestroy(&b->OAIJ));
639   PetscCall(MatDestroy(&b->A));
640   PetscCall(VecScatterDestroy(&b->ctx));
641   PetscCall(VecDestroy(&b->w));
642   PetscCall(PetscFree(b->S));
643   PetscCall(PetscFree(b->T));
644   PetscCall(PetscFree(b->ibdiag));
645   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
647   PetscCall(PetscFree(A->data));
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 /* zz = yy + Axx */
652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
655   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656   const PetscScalar *s = b->S, *t = b->T;
657   const PetscScalar *x, *v, *bx;
658   PetscScalar       *y, *sums;
659   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
660   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
661 
662   PetscFunctionBegin;
663   if (!yy) {
664     PetscCall(VecSet(zz, 0.0));
665   } else {
666     PetscCall(VecCopy(yy, zz));
667   }
668   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
669 
670   PetscCall(VecGetArrayRead(xx, &x));
671   PetscCall(VecGetArray(zz, &y));
672   idx = a->j;
673   v   = a->a;
674   ii  = a->i;
675 
676   if (b->isTI) {
677     for (i = 0; i < m; i++) {
678       jrow = ii[i];
679       n    = ii[i + 1] - jrow;
680       sums = y + p * i;
681       for (j = 0; j < n; j++) {
682         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
683       }
684     }
685     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
686   } else if (t) {
687     for (i = 0; i < m; i++) {
688       jrow = ii[i];
689       n    = ii[i + 1] - jrow;
690       sums = y + p * i;
691       for (j = 0; j < n; j++) {
692         for (k = 0; k < p; k++) {
693           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
694         }
695       }
696     }
697     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
698      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
699      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
700      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
701     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
702   }
703   if (s) {
704     for (i = 0; i < m; i++) {
705       sums = y + p * i;
706       bx   = x + q * i;
707       if (i < b->AIJ->cmap->n) {
708         for (j = 0; j < q; j++) {
709           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
710         }
711       }
712     }
713     PetscCall(PetscLogFlops(2.0 * m * p * q));
714   }
715 
716   PetscCall(VecRestoreArrayRead(xx, &x));
717   PetscCall(VecRestoreArray(zz, &y));
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
722 {
723   PetscFunctionBegin;
724   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 #include <petsc/private/kernels/blockinvert.h>
729 
730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
731 {
732   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
733   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
734   const PetscScalar *S = b->S;
735   const PetscScalar *T = b->T;
736   const PetscScalar *v = a->a;
737   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
738   PetscInt           i, j, *v_pivots, dof, dof2;
739   PetscScalar       *diag, aval, *v_work;
740 
741   PetscFunctionBegin;
742   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
743   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
744 
745   dof  = p;
746   dof2 = dof * dof;
747 
748   if (b->ibdiagvalid) {
749     if (values) *values = b->ibdiag;
750     PetscFunctionReturn(PETSC_SUCCESS);
751   }
752   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
753   if (values) *values = b->ibdiag;
754   diag = b->ibdiag;
755 
756   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
757   for (i = 0; i < m; i++) {
758     if (S) {
759       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
760     } else {
761       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
762     }
763     if (b->isTI) {
764       aval = 0;
765       for (j = ii[i]; j < ii[i + 1]; j++)
766         if (idx[j] == i) aval = v[j];
767       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j = ii[i]; j < ii[i + 1]; j++)
771         if (idx[j] == i) aval = v[j];
772       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
773     }
774     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
775     diag += dof2;
776   }
777   PetscCall(PetscFree2(v_work, v_pivots));
778 
779   b->ibdiagvalid = PETSC_TRUE;
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
784 {
785   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
786 
787   PetscFunctionBegin;
788   *B = kaij->AIJ;
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
793 {
794   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
795   Mat            AIJ, OAIJ, B;
796   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
797   const PetscInt p = a->p, q = a->q;
798   PetscBool      ismpikaij, missing;
799 
800   PetscFunctionBegin;
801   if (reuse != MAT_REUSE_MATRIX) {
802     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
803     if (ismpikaij) {
804       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
805       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
806       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
807     } else {
808       AIJ  = a->AIJ;
809       OAIJ = NULL;
810     }
811     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
812     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
813     PetscCall(MatSetType(B, MATAIJ));
814     PetscCall(MatGetSize(AIJ, &m, NULL));
815     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
816     if (!missing || !a->S) d = m;
817     PetscCall(PetscMalloc1(m * p, &d_nnz));
818     for (i = 0; i < m; ++i) {
819       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
821       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
822     }
823     if (OAIJ) {
824       PetscCall(PetscMalloc1(m * p, &o_nnz));
825       for (i = 0; i < m; ++i) {
826         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
828         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
829       }
830       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
831     } else {
832       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
833     }
834     PetscCall(PetscFree(d_nnz));
835     PetscCall(PetscFree(o_nnz));
836   } else B = *newmat;
837   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
838   if (reuse == MAT_INPLACE_MATRIX) {
839     PetscCall(MatHeaderReplace(A, &B));
840   } else *newmat = B;
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
845 {
846   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
847   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
848   const PetscScalar *aa = a->a, *T = kaij->T, *v;
849   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
850   const PetscScalar *b, *xb, *idiag;
851   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
852   PetscInt           i, j, k, i2, bs, bs2, nz;
853 
854   PetscFunctionBegin;
855   its = its * lits;
856   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
857   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
858   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
859   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
860   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
861   bs  = p;
862   bs2 = bs * bs;
863 
864   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
865 
866   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
867   idiag = kaij->ibdiag;
868   diag  = a->diag;
869 
870   if (!kaij->sor.setup) {
871     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
872     kaij->sor.setup = PETSC_TRUE;
873   }
874   y    = kaij->sor.y;
875   w    = kaij->sor.w;
876   work = kaij->sor.work;
877   t    = kaij->sor.t;
878   arr  = kaij->sor.arr;
879 
880   PetscCall(VecGetArray(xx, &x));
881   PetscCall(VecGetArrayRead(bb, &b));
882 
883   if (flag & SOR_ZERO_INITIAL_GUESS) {
884     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
885       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
886       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
887       i2 = bs;
888       idiag += bs2;
889       for (i = 1; i < m; i++) {
890         v  = aa + ai[i];
891         vi = aj + ai[i];
892         nz = diag[i] - ai[i];
893 
894         if (T) { /* b - T (Arow * x) */
895           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
896           for (j = 0; j < nz; j++) {
897             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
898           }
899           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
900           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
901         } else if (kaij->isTI) {
902           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
903           for (j = 0; j < nz; j++) {
904             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
905           }
906         } else {
907           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
908         }
909 
910         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
911         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
912 
913         idiag += bs2;
914         i2 += bs;
915       }
916       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
917       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
918       xb = t;
919     } else xb = b;
920     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
921       idiag = kaij->ibdiag + bs2 * (m - 1);
922       i2    = bs * (m - 1);
923       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
924       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
925       i2 -= bs;
926       idiag -= bs2;
927       for (i = m - 2; i >= 0; i--) {
928         v  = aa + diag[i] + 1;
929         vi = aj + diag[i] + 1;
930         nz = ai[i + 1] - diag[i] - 1;
931 
932         if (T) { /* FIXME: This branch untested */
933           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
934           /* copy all rows of x that are needed into contiguous space */
935           workt = work;
936           for (j = 0; j < nz; j++) {
937             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
938             workt += bs;
939           }
940           arrt = arr;
941           for (j = 0; j < nz; j++) {
942             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
943             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
944             arrt += bs2;
945           }
946           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
947         } else if (kaij->isTI) {
948           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
949           for (j = 0; j < nz; j++) {
950             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
951           }
952         }
953 
954         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
955         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
956 
957         idiag -= bs2;
958         i2 -= bs;
959       }
960       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
961     }
962     its--;
963   }
964   while (its--) { /* FIXME: This branch not updated */
965     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
966       i2    = 0;
967       idiag = kaij->ibdiag;
968       for (i = 0; i < m; i++) {
969         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
970 
971         v     = aa + ai[i];
972         vi    = aj + ai[i];
973         nz    = diag[i] - ai[i];
974         workt = work;
975         for (j = 0; j < nz; j++) {
976           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
977           workt += bs;
978         }
979         arrt = arr;
980         if (T) {
981           for (j = 0; j < nz; j++) {
982             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
983             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
984             arrt += bs2;
985           }
986           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
987         } else if (kaij->isTI) {
988           for (j = 0; j < nz; j++) {
989             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
990             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
991             arrt += bs2;
992           }
993           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
994         }
995         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
996 
997         v     = aa + diag[i] + 1;
998         vi    = aj + diag[i] + 1;
999         nz    = ai[i + 1] - diag[i] - 1;
1000         workt = work;
1001         for (j = 0; j < nz; j++) {
1002           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1003           workt += bs;
1004         }
1005         arrt = arr;
1006         if (T) {
1007           for (j = 0; j < nz; j++) {
1008             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1009             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1010             arrt += bs2;
1011           }
1012           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1013         } else if (kaij->isTI) {
1014           for (j = 0; j < nz; j++) {
1015             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1016             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1017             arrt += bs2;
1018           }
1019           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1020         }
1021 
1022         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1023         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1024 
1025         idiag += bs2;
1026         i2 += bs;
1027       }
1028       xb = t;
1029     } else xb = b;
1030     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1031       idiag = kaij->ibdiag + bs2 * (m - 1);
1032       i2    = bs * (m - 1);
1033       if (xb == b) {
1034         for (i = m - 1; i >= 0; i--) {
1035           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1036 
1037           v     = aa + ai[i];
1038           vi    = aj + ai[i];
1039           nz    = diag[i] - ai[i];
1040           workt = work;
1041           for (j = 0; j < nz; j++) {
1042             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1043             workt += bs;
1044           }
1045           arrt = arr;
1046           if (T) {
1047             for (j = 0; j < nz; j++) {
1048               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1049               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1050               arrt += bs2;
1051             }
1052             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1053           } else if (kaij->isTI) {
1054             for (j = 0; j < nz; j++) {
1055               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1056               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1057               arrt += bs2;
1058             }
1059             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1060           }
1061 
1062           v     = aa + diag[i] + 1;
1063           vi    = aj + diag[i] + 1;
1064           nz    = ai[i + 1] - diag[i] - 1;
1065           workt = work;
1066           for (j = 0; j < nz; j++) {
1067             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1068             workt += bs;
1069           }
1070           arrt = arr;
1071           if (T) {
1072             for (j = 0; j < nz; j++) {
1073               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1074               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1078           } else if (kaij->isTI) {
1079             for (j = 0; j < nz; j++) {
1080               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1081               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1082               arrt += bs2;
1083             }
1084             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1085           }
1086 
1087           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1088           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1089         }
1090       } else {
1091         for (i = m - 1; i >= 0; i--) {
1092           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1093           v     = aa + diag[i] + 1;
1094           vi    = aj + diag[i] + 1;
1095           nz    = ai[i + 1] - diag[i] - 1;
1096           workt = work;
1097           for (j = 0; j < nz; j++) {
1098             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1099             workt += bs;
1100           }
1101           arrt = arr;
1102           if (T) {
1103             for (j = 0; j < nz; j++) {
1104               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1105               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1106               arrt += bs2;
1107             }
1108             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1109           } else if (kaij->isTI) {
1110             for (j = 0; j < nz; j++) {
1111               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1112               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1113               arrt += bs2;
1114             }
1115             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1116           }
1117           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1118           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1119         }
1120       }
1121       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1122     }
1123   }
1124 
1125   PetscCall(VecRestoreArray(xx, &x));
1126   PetscCall(VecRestoreArrayRead(bb, &b));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /*===================================================================================*/
1131 
1132 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133 {
1134   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (!yy) {
1138     PetscCall(VecSet(zz, 0.0));
1139   } else {
1140     PetscCall(VecCopy(yy, zz));
1141   }
1142   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1143   /* start the scatter */
1144   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1146   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1147   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1152 {
1153   PetscFunctionBegin;
1154   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1155   PetscFunctionReturn(PETSC_SUCCESS);
1156 }
1157 
1158 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1159 {
1160   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1164   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1169 {
1170   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1171   PetscBool    diag = PETSC_FALSE;
1172   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1173   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1174 
1175   PetscFunctionBegin;
1176   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1177   b->getrowactive = PETSC_TRUE;
1178   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1179 
1180   if ((!S) && (!T) && (!b->isTI)) {
1181     if (ncols) *ncols = 0;
1182     if (cols) *cols = NULL;
1183     if (values) *values = NULL;
1184     PetscFunctionReturn(PETSC_SUCCESS);
1185   }
1186 
1187   if (T || b->isTI) {
1188     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1189     c = nzaij;
1190     for (i = 0; i < nzaij; i++) {
1191       /* check if this row contains a diagonal entry */
1192       if (colsaij[i] == r) {
1193         diag = PETSC_TRUE;
1194         c    = i;
1195       }
1196     }
1197   } else nzaij = c = 0;
1198 
1199   /* calculate size of row */
1200   nz = 0;
1201   if (S) nz += q;
1202   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1203 
1204   if (cols || values) {
1205     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1206     for (i = 0; i < q; i++) {
1207       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1208       v[i] = 0.0;
1209     }
1210     if (b->isTI) {
1211       for (i = 0; i < nzaij; i++) {
1212         for (j = 0; j < q; j++) {
1213           idx[i * q + j] = colsaij[i] * q + j;
1214           v[i * q + j]   = (j == s ? vaij[i] : 0);
1215         }
1216       }
1217     } else if (T) {
1218       for (i = 0; i < nzaij; i++) {
1219         for (j = 0; j < q; j++) {
1220           idx[i * q + j] = colsaij[i] * q + j;
1221           v[i * q + j]   = vaij[i] * T[s + j * p];
1222         }
1223       }
1224     }
1225     if (S) {
1226       for (j = 0; j < q; j++) {
1227         idx[c * q + j] = r * q + j;
1228         v[c * q + j] += S[s + j * p];
1229       }
1230     }
1231   }
1232 
1233   if (ncols) *ncols = nz;
1234   if (cols) *cols = idx;
1235   if (values) *values = v;
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1240 {
1241   PetscFunctionBegin;
1242   PetscCall(PetscFree2(*idx, *v));
1243   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
1247 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1248 {
1249   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1250   Mat            AIJ  = b->A;
1251   PetscBool      diag = PETSC_FALSE;
1252   Mat            MatAIJ, MatOAIJ;
1253   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1254   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1255   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1256 
1257   PetscFunctionBegin;
1258   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1259   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1260   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1261   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1262   b->getrowactive = PETSC_TRUE;
1263   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1264   lrow = row - rstart;
1265 
1266   if ((!S) && (!T) && (!b->isTI)) {
1267     if (ncols) *ncols = 0;
1268     if (cols) *cols = NULL;
1269     if (values) *values = NULL;
1270     PetscFunctionReturn(PETSC_SUCCESS);
1271   }
1272 
1273   r = lrow / p;
1274   s = lrow % p;
1275 
1276   if (T || b->isTI) {
1277     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1278     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1279     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1280 
1281     c = ncolsaij + ncolsoaij;
1282     for (i = 0; i < ncolsaij; i++) {
1283       /* check if this row contains a diagonal entry */
1284       if (colsaij[i] == r) {
1285         diag = PETSC_TRUE;
1286         c    = i;
1287       }
1288     }
1289   } else c = 0;
1290 
1291   /* calculate size of row */
1292   nz = 0;
1293   if (S) nz += q;
1294   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1295 
1296   if (cols || values) {
1297     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1298     for (i = 0; i < q; i++) {
1299       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1300       v[i] = 0.0;
1301     }
1302     if (b->isTI) {
1303       for (i = 0; i < ncolsaij; i++) {
1304         for (j = 0; j < q; j++) {
1305           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1306           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1307         }
1308       }
1309       for (i = 0; i < ncolsoaij; i++) {
1310         for (j = 0; j < q; j++) {
1311           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1312           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1313         }
1314       }
1315     } else if (T) {
1316       for (i = 0; i < ncolsaij; i++) {
1317         for (j = 0; j < q; j++) {
1318           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1319           v[i * q + j]   = vals[i] * T[s + j * p];
1320         }
1321       }
1322       for (i = 0; i < ncolsoaij; i++) {
1323         for (j = 0; j < q; j++) {
1324           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1325           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1326         }
1327       }
1328     }
1329     if (S) {
1330       for (j = 0; j < q; j++) {
1331         idx[c * q + j] = (r + rstart / p) * q + j;
1332         v[c * q + j] += S[s + j * p];
1333       }
1334     }
1335   }
1336 
1337   if (ncols) *ncols = nz;
1338   if (cols) *cols = idx;
1339   if (values) *values = v;
1340   PetscFunctionReturn(PETSC_SUCCESS);
1341 }
1342 
1343 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1344 {
1345   PetscFunctionBegin;
1346   PetscCall(PetscFree2(*idx, *v));
1347   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350 
1351 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1352 {
1353   Mat A;
1354 
1355   PetscFunctionBegin;
1356   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1357   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1358   PetscCall(MatDestroy(&A));
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
1362 /*@C
1363   MatCreateKAIJ - Creates a matrix of type `MATKAIJ`.
1364 
1365   Collective
1366 
1367   Input Parameters:
1368 + A - the `MATAIJ` matrix
1369 . p - number of rows in `S` and `T`
1370 . q - number of columns in `S` and `T`
1371 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1372 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1373 
1374   Output Parameter:
1375 . kaij - the new `MATKAIJ` matrix
1376 
1377   Level: advanced
1378 
1379   Notes:
1380   The created matrix is of the following form\:
1381 .vb
1382     [I \otimes S + A \otimes T]
1383 .ve
1384   where
1385 .vb
1386   S is a dense (p \times q) matrix
1387   T is a dense (p \times q) matrix
1388   A is a `MATAIJ`  (n \times n) matrix
1389   I is the identity matrix
1390 .ve
1391   The resulting matrix is (np \times nq)
1392 
1393   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in
1394   column-major format.
1395 
1396   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1397 
1398   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1399 
1400   Developer Notes:
1401   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1402   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1403   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1404   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1405 
1406 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1407 @*/
1408 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1409 {
1410   PetscFunctionBegin;
1411   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1412   PetscCall(MatSetType(*kaij, MATKAIJ));
1413   PetscCall(MatKAIJSetAIJ(*kaij, A));
1414   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1415   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1416   PetscCall(MatSetUp(*kaij));
1417   PetscFunctionReturn(PETSC_SUCCESS);
1418 }
1419 
1420 /*MC
1421   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1422     [I \otimes S + A \otimes T],
1423   where
1424 .vb
1425     S is a dense (p \times q) matrix,
1426     T is a dense (p \times q) matrix,
1427     A is an AIJ  (n \times n) matrix,
1428     and I is the identity matrix.
1429 .ve
1430   The resulting matrix is (np \times nq).
1431 
1432   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1433 
1434   Level: advanced
1435 
1436   Note:
1437   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1438   where x and b are column vectors containing the row-major representations of X and B.
1439 
1440 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1441 M*/
1442 
1443 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1444 {
1445   Mat_MPIKAIJ *b;
1446   PetscMPIInt  size;
1447 
1448   PetscFunctionBegin;
1449   PetscCall(PetscNew(&b));
1450   A->data = (void *)b;
1451 
1452   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1453 
1454   b->w = NULL;
1455   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1456   if (size == 1) {
1457     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1458     A->ops->destroy             = MatDestroy_SeqKAIJ;
1459     A->ops->mult                = MatMult_SeqKAIJ;
1460     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1461     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1462     A->ops->getrow              = MatGetRow_SeqKAIJ;
1463     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1464     A->ops->sor                 = MatSOR_SeqKAIJ;
1465     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1466   } else {
1467     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1468     A->ops->destroy             = MatDestroy_MPIKAIJ;
1469     A->ops->mult                = MatMult_MPIKAIJ;
1470     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1471     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1472     A->ops->getrow              = MatGetRow_MPIKAIJ;
1473     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1474     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1475     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1476   }
1477   A->ops->setup           = MatSetUp_KAIJ;
1478   A->ops->view            = MatView_KAIJ;
1479   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1480   PetscFunctionReturn(PETSC_SUCCESS);
1481 }
1482