xref: /petsc/src/mat/impls/kaij/kaij.c (revision 27f49a208b01d2e827ab9db411a2d16003fe9262)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
32 
33    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
34 
35    Input Parameter:
36 .  A - the `MATKAIJ` matrix
37 
38    Output Parameter:
39 .  B - the `MATAIJ` matrix
40 
41    Level: advanced
42 
43    Note:
44    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
45 
46 .seealso: [](chapter_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47 @*/
48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
49 {
50   PetscBool ismpikaij, isseqkaij;
51 
52   PetscFunctionBegin;
53   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
54   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
69 
70    Not Collective; the entire `S` is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the `MATKAIJ` matrix
74 
75    Output Parameters:
76 +  m - the number of rows in `S`
77 .  n - the number of columns in `S`
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Level: advanced
81 
82    Note:
83    All output parameters are optional (pass `NULL` if not desired)
84 
85 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
86 @*/
87 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
88 {
89   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
90   PetscFunctionBegin;
91   if (m) *m = b->p;
92   if (n) *n = b->q;
93   if (S) *S = b->S;
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 /*@C
98    MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
99 
100    Not Collective; the entire `S` is stored and returned independently on all processes.
101 
102    Input Parameter:
103 .  A - the `MATKAIJ` matrix
104 
105    Output Parameters:
106 +  m - the number of rows in `S`
107 .  n - the number of columns in `S`
108 -  S - the S matrix, in form of a scalar array in column-major format
109 
110    Level: advanced
111 
112    Note:
113    All output parameters are optional (pass `NULL` if not desired)
114 
115 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116 @*/
117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
118 {
119   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120   PetscFunctionBegin;
121   if (m) *m = b->p;
122   if (n) *n = b->q;
123   if (S) *S = b->S;
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 /*@C
128   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
129 
130   Not Collective
131 
132   Input Parameters:
133 + A - the `MATKAIJ` matrix
134 - S - location of pointer to array obtained with `MatKAIJGetS()`
135 
136   Level: advanced
137 
138   Note:
139   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
140   If `NULL` is passed, it will not attempt to zero the array pointer.
141 
142 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
143 @*/
144 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
145 {
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   PetscCall(PetscObjectStateIncrease((PetscObject)A));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
154 
155   Not Collective
156 
157   Input Parameters:
158 + A - the `MATKAIJ` matrix
159 - S - location of pointer to array obtained with `MatKAIJGetS()`
160 
161   Level: advanced
162 
163   Note:
164   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
165   If `NULL` is passed, it will not attempt to zero the array pointer.
166 
167 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
178 
179    Not Collective; the entire `T` is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the `MATKAIJ` matrix
183 
184    Output Parameters:
185 +  m - the number of rows in `T`
186 .  n - the number of columns in `T`
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Level: advanced
190 
191    Note:
192    All output parameters are optional (pass `NULL` if not desired)
193 
194 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
195 @*/
196 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
197 {
198   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
199   PetscFunctionBegin;
200   if (m) *m = b->p;
201   if (n) *n = b->q;
202   if (T) *T = b->T;
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 /*@C
207    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
208 
209    Not Collective; the entire `T` is stored and returned independently on all processes
210 
211    Input Parameter:
212 .  A - the `MATKAIJ` matrix
213 
214    Output Parameters:
215 +  m - the number of rows in `T`
216 .  n - the number of columns in `T`
217 -  T - the T matrix, in form of a scalar array in column-major format
218 
219    Level: advanced
220 
221    Note:
222    All output parameters are optional (pass `NULL` if not desired)
223 
224 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
225 @*/
226 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
227 {
228   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
229   PetscFunctionBegin;
230   if (m) *m = b->p;
231   if (n) *n = b->q;
232   if (T) *T = b->T;
233   PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 
236 /*@C
237   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
238 
239   Not Collective
240 
241   Input Parameters:
242 + A - the `MATKAIJ` matrix
243 - T - location of pointer to array obtained with `MatKAIJGetS()`
244 
245   Level: advanced
246 
247   Note:
248   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
249   If `NULL` is passed, it will not attempt to zero the array pointer.
250 
251 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
252 @*/
253 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
254 {
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   PetscCall(PetscObjectStateIncrease((PetscObject)A));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
263 
264   Not Collective
265 
266   Input Parameters:
267 + A - the `MATKAIJ` matrix
268 - T - location of pointer to array obtained with `MatKAIJGetS()`
269 
270   Level: advanced
271 
272   Note:
273   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
274   If `NULL` is passed, it will not attempt to zero the array pointer.
275 
276 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
287 
288    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
289 
290    Input Parameters:
291 +  A - the `MATKAIJ` matrix
292 -  B - the `MATAIJ` matrix
293 
294    Level: advanced
295 
296    Notes:
297    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
298 
299    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
300 
301 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
302 @*/
303 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
304 {
305   PetscMPIInt size;
306   PetscBool   flg;
307 
308   PetscFunctionBegin;
309   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
310   if (size == 1) {
311     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
312     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
313     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
314     a->AIJ         = B;
315   } else {
316     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
317     a->A           = B;
318   }
319   PetscCall(PetscObjectReference((PetscObject)B));
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324    MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
325 
326    Logically Collective; the entire `S` is stored independently on all processes.
327 
328    Input Parameters:
329 +  A - the `MATKAIJ` matrix
330 .  p - the number of rows in `S`
331 .  q - the number of columns in `S`
332 -  S - the S matrix, in form of a scalar array in column-major format
333 
334    Level: advanced
335 
336    Notes:
337    The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
338 
339    The `S` matrix is copied, so the user can destroy this array.
340 
341 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
342 @*/
343 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
344 {
345   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
346 
347   PetscFunctionBegin;
348   PetscCall(PetscFree(a->S));
349   if (S) {
350     PetscCall(PetscMalloc1(p * q, &a->S));
351     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
352   } else a->S = NULL;
353 
354   a->p = p;
355   a->q = q;
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 /*@C
360    MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
361 
362    Logically Collective.
363 
364    Input Parameter:
365 .  A - the `MATKAIJ` matrix
366 
367   Output Parameter:
368 .  identity - the Boolean value
369 
370    Level: Advanced
371 
372 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
373 @*/
374 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
375 {
376   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
377   PetscInt     i, j;
378 
379   PetscFunctionBegin;
380   if (a->p != a->q) {
381     *identity = PETSC_FALSE;
382     PetscFunctionReturn(PETSC_SUCCESS);
383   } else *identity = PETSC_TRUE;
384   if (!a->isTI || a->S) {
385     for (i = 0; i < a->p && *identity; i++) {
386       for (j = 0; j < a->p && *identity; j++) {
387         if (i != j) {
388           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
390         } else {
391           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
393         }
394       }
395     }
396   }
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 /*@C
401    MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
402 
403    Logically Collective; the entire `T` is stored independently on all processes.
404 
405    Input Parameters:
406 +  A - the `MATKAIJ` matrix
407 .  p - the number of rows in `S`
408 .  q - the number of columns in `S`
409 -  T - the `T` matrix, in form of a scalar array in column-major format
410 
411    Level: Advanced
412 
413    Notes:
414    The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
415 
416    The `T` matrix is copied, so the user can destroy this array.
417 
418 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
419 @*/
420 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
421 {
422   PetscInt     i, j;
423   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
424   PetscBool    isTI = PETSC_FALSE;
425 
426   PetscFunctionBegin;
427   /* check if T is an identity matrix */
428   if (T && (p == q)) {
429     isTI = PETSC_TRUE;
430     for (i = 0; i < p; i++) {
431       for (j = 0; j < q; j++) {
432         if (i == j) {
433           /* diagonal term must be 1 */
434           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
435         } else {
436           /* off-diagonal term must be 0 */
437           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
438         }
439       }
440     }
441   }
442   a->isTI = isTI;
443 
444   PetscCall(PetscFree(a->T));
445   if (T && (!isTI)) {
446     PetscCall(PetscMalloc1(p * q, &a->T));
447     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
448   } else a->T = NULL;
449 
450   a->p = p;
451   a->q = q;
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
456 {
457   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
458 
459   PetscFunctionBegin;
460   PetscCall(MatDestroy(&b->AIJ));
461   PetscCall(PetscFree(b->S));
462   PetscCall(PetscFree(b->T));
463   PetscCall(PetscFree(b->ibdiag));
464   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
465   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
466   PetscCall(PetscFree(A->data));
467   PetscFunctionReturn(PETSC_SUCCESS);
468 }
469 
470 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
471 {
472   Mat_MPIKAIJ     *a;
473   Mat_MPIAIJ      *mpiaij;
474   PetscScalar     *T;
475   PetscInt         i, j;
476   PetscObjectState state;
477 
478   PetscFunctionBegin;
479   a      = (Mat_MPIKAIJ *)A->data;
480   mpiaij = (Mat_MPIAIJ *)a->A->data;
481 
482   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
483   if (state == a->state) {
484     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
485     PetscFunctionReturn(PETSC_SUCCESS);
486   } else {
487     PetscCall(MatDestroy(&a->AIJ));
488     PetscCall(MatDestroy(&a->OAIJ));
489     if (a->isTI) {
490       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
491        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
492        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
493        * to pass in. */
494       PetscCall(PetscMalloc1(a->p * a->q, &T));
495       for (i = 0; i < a->p; i++) {
496         for (j = 0; j < a->q; j++) {
497           if (i == j) T[i + j * a->p] = 1.0;
498           else T[i + j * a->p] = 0.0;
499         }
500       }
501     } else T = a->T;
502     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
503     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
504     if (a->isTI) PetscCall(PetscFree(T));
505     a->state = state;
506   }
507 
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 static PetscErrorCode MatSetUp_KAIJ(Mat A)
512 {
513   PetscInt     n;
514   PetscMPIInt  size;
515   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
516 
517   PetscFunctionBegin;
518   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
519   if (size == 1) {
520     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
521     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
522     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
523     PetscCall(PetscLayoutSetUp(A->rmap));
524     PetscCall(PetscLayoutSetUp(A->cmap));
525   } else {
526     Mat_MPIKAIJ *a;
527     Mat_MPIAIJ  *mpiaij;
528     IS           from, to;
529     Vec          gvec;
530 
531     a      = (Mat_MPIKAIJ *)A->data;
532     mpiaij = (Mat_MPIAIJ *)a->A->data;
533     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
534     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
535     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
536     PetscCall(PetscLayoutSetUp(A->rmap));
537     PetscCall(PetscLayoutSetUp(A->cmap));
538 
539     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
540 
541     PetscCall(VecGetSize(mpiaij->lvec, &n));
542     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
543     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
544     PetscCall(VecSetBlockSize(a->w, a->q));
545     PetscCall(VecSetType(a->w, VECSEQ));
546 
547     /* create two temporary Index sets for build scatter gather */
548     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
549     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
550 
551     /* create temporary global vector to generate scatter context */
552     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
553 
554     /* generate the scatter context */
555     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
556 
557     PetscCall(ISDestroy(&from));
558     PetscCall(ISDestroy(&to));
559     PetscCall(VecDestroy(&gvec));
560   }
561 
562   A->assembled = PETSC_TRUE;
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
566 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
567 {
568   PetscViewerFormat format;
569   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
570   Mat               B;
571   PetscInt          i;
572   PetscBool         ismpikaij;
573 
574   PetscFunctionBegin;
575   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
576   PetscCall(PetscViewerGetFormat(viewer, &format));
577   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
579 
580     /* Print appropriate details for S. */
581     if (!a->S) {
582       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
583     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
585       for (i = 0; i < (a->p * a->q); i++) {
586 #if defined(PETSC_USE_COMPLEX)
587         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
588 #else
589         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
590 #endif
591       }
592       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
593     }
594 
595     /* Print appropriate details for T. */
596     if (a->isTI) {
597       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
598     } else if (!a->T) {
599       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
600     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
602       for (i = 0; i < (a->p * a->q); i++) {
603 #if defined(PETSC_USE_COMPLEX)
604         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
605 #else
606         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
607 #endif
608       }
609       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
610     }
611 
612     /* Now print details for the AIJ matrix, using the AIJ viewer. */
613     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
614     if (ismpikaij) {
615       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
616       PetscCall(MatView(b->A, viewer));
617     } else {
618       PetscCall(MatView(a->AIJ, viewer));
619     }
620 
621   } else {
622     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
624     PetscCall(MatView(B, viewer));
625     PetscCall(MatDestroy(&B));
626   }
627   PetscFunctionReturn(PETSC_SUCCESS);
628 }
629 
630 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
631 {
632   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
633 
634   PetscFunctionBegin;
635   PetscCall(MatDestroy(&b->AIJ));
636   PetscCall(MatDestroy(&b->OAIJ));
637   PetscCall(MatDestroy(&b->A));
638   PetscCall(VecScatterDestroy(&b->ctx));
639   PetscCall(VecDestroy(&b->w));
640   PetscCall(PetscFree(b->S));
641   PetscCall(PetscFree(b->T));
642   PetscCall(PetscFree(b->ibdiag));
643   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
645   PetscCall(PetscFree(A->data));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 /* zz = yy + Axx */
650 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
651 {
652   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
653   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
654   const PetscScalar *s = b->S, *t = b->T;
655   const PetscScalar *x, *v, *bx;
656   PetscScalar       *y, *sums;
657   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
658   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;
659 
660   PetscFunctionBegin;
661   if (!yy) {
662     PetscCall(VecSet(zz, 0.0));
663   } else {
664     PetscCall(VecCopy(yy, zz));
665   }
666   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
667 
668   PetscCall(VecGetArrayRead(xx, &x));
669   PetscCall(VecGetArray(zz, &y));
670   idx = a->j;
671   v   = a->a;
672   ii  = a->i;
673 
674   if (b->isTI) {
675     for (i = 0; i < m; i++) {
676       jrow = ii[i];
677       n    = ii[i + 1] - jrow;
678       sums = y + p * i;
679       for (j = 0; j < n; j++) {
680         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
681       }
682     }
683     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
684   } else if (t) {
685     for (i = 0; i < m; i++) {
686       jrow = ii[i];
687       n    = ii[i + 1] - jrow;
688       sums = y + p * i;
689       for (j = 0; j < n; j++) {
690         for (k = 0; k < p; k++) {
691           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
692         }
693       }
694     }
695     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
696      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
697      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
698      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
699     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
700   }
701   if (s) {
702     for (i = 0; i < m; i++) {
703       sums = y + p * i;
704       bx   = x + q * i;
705       if (i < b->AIJ->cmap->n) {
706         for (j = 0; j < q; j++) {
707           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
708         }
709       }
710     }
711     PetscCall(PetscLogFlops(2.0 * m * p * q));
712   }
713 
714   PetscCall(VecRestoreArrayRead(xx, &x));
715   PetscCall(VecRestoreArray(zz, &y));
716   PetscFunctionReturn(PETSC_SUCCESS);
717 }
718 
719 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
720 {
721   PetscFunctionBegin;
722   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 #include <petsc/private/kernels/blockinvert.h>
727 
728 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
729 {
730   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
731   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
732   const PetscScalar *S = b->S;
733   const PetscScalar *T = b->T;
734   const PetscScalar *v = a->a;
735   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736   PetscInt           i, j, *v_pivots, dof, dof2;
737   PetscScalar       *diag, aval, *v_work;
738 
739   PetscFunctionBegin;
740   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
741   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
742 
743   dof  = p;
744   dof2 = dof * dof;
745 
746   if (b->ibdiagvalid) {
747     if (values) *values = b->ibdiag;
748     PetscFunctionReturn(PETSC_SUCCESS);
749   }
750   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
751   if (values) *values = b->ibdiag;
752   diag = b->ibdiag;
753 
754   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
755   for (i = 0; i < m; i++) {
756     if (S) {
757       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
758     } else {
759       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
760     }
761     if (b->isTI) {
762       aval = 0;
763       for (j = ii[i]; j < ii[i + 1]; j++)
764         if (idx[j] == i) aval = v[j];
765       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
766     } else if (T) {
767       aval = 0;
768       for (j = ii[i]; j < ii[i + 1]; j++)
769         if (idx[j] == i) aval = v[j];
770       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
771     }
772     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
773     diag += dof2;
774   }
775   PetscCall(PetscFree2(v_work, v_pivots));
776 
777   b->ibdiagvalid = PETSC_TRUE;
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
782 {
783   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
784 
785   PetscFunctionBegin;
786   *B = kaij->AIJ;
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
791 {
792   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
793   Mat            AIJ, OAIJ, B;
794   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
795   const PetscInt p = a->p, q = a->q;
796   PetscBool      ismpikaij, missing;
797 
798   PetscFunctionBegin;
799   if (reuse != MAT_REUSE_MATRIX) {
800     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
801     if (ismpikaij) {
802       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
803       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
804       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
805     } else {
806       AIJ  = a->AIJ;
807       OAIJ = NULL;
808     }
809     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
810     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
811     PetscCall(MatSetType(B, MATAIJ));
812     PetscCall(MatGetSize(AIJ, &m, NULL));
813     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
814     if (!missing || !a->S) d = m;
815     PetscCall(PetscMalloc1(m * p, &d_nnz));
816     for (i = 0; i < m; ++i) {
817       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
818       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
819       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820     }
821     if (OAIJ) {
822       PetscCall(PetscMalloc1(m * p, &o_nnz));
823       for (i = 0; i < m; ++i) {
824         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
825         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
826         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827       }
828       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
829     } else {
830       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
831     }
832     PetscCall(PetscFree(d_nnz));
833     PetscCall(PetscFree(o_nnz));
834   } else B = *newmat;
835   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
836   if (reuse == MAT_INPLACE_MATRIX) {
837     PetscCall(MatHeaderReplace(A, &B));
838   } else *newmat = B;
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
843 {
844   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
845   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
846   const PetscScalar *aa = a->a, *T = kaij->T, *v;
847   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
848   const PetscScalar *b, *xb, *idiag;
849   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
850   PetscInt           i, j, k, i2, bs, bs2, nz;
851 
852   PetscFunctionBegin;
853   its = its * lits;
854   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
855   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
856   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
857   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
858   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
859   bs  = p;
860   bs2 = bs * bs;
861 
862   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
863 
864   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
865   idiag = kaij->ibdiag;
866   diag  = a->diag;
867 
868   if (!kaij->sor.setup) {
869     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
870     kaij->sor.setup = PETSC_TRUE;
871   }
872   y    = kaij->sor.y;
873   w    = kaij->sor.w;
874   work = kaij->sor.work;
875   t    = kaij->sor.t;
876   arr  = kaij->sor.arr;
877 
878   PetscCall(VecGetArray(xx, &x));
879   PetscCall(VecGetArrayRead(bb, &b));
880 
881   if (flag & SOR_ZERO_INITIAL_GUESS) {
882     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
883       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
884       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
885       i2 = bs;
886       idiag += bs2;
887       for (i = 1; i < m; i++) {
888         v  = aa + ai[i];
889         vi = aj + ai[i];
890         nz = diag[i] - ai[i];
891 
892         if (T) { /* b - T (Arow * x) */
893           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
894           for (j = 0; j < nz; j++) {
895             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
896           }
897           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
898           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
899         } else if (kaij->isTI) {
900           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
901           for (j = 0; j < nz; j++) {
902             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
903           }
904         } else {
905           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
906         }
907 
908         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
909         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
910 
911         idiag += bs2;
912         i2 += bs;
913       }
914       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
915       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
916       xb = t;
917     } else xb = b;
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
919       idiag = kaij->ibdiag + bs2 * (m - 1);
920       i2    = bs * (m - 1);
921       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
922       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
923       i2 -= bs;
924       idiag -= bs2;
925       for (i = m - 2; i >= 0; i--) {
926         v  = aa + diag[i] + 1;
927         vi = aj + diag[i] + 1;
928         nz = ai[i + 1] - diag[i] - 1;
929 
930         if (T) { /* FIXME: This branch untested */
931           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
932           /* copy all rows of x that are needed into contiguous space */
933           workt = work;
934           for (j = 0; j < nz; j++) {
935             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
936             workt += bs;
937           }
938           arrt = arr;
939           for (j = 0; j < nz; j++) {
940             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
941             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
942             arrt += bs2;
943           }
944           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
945         } else if (kaij->isTI) {
946           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
947           for (j = 0; j < nz; j++) {
948             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
949           }
950         }
951 
952         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
953         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
954 
955         idiag -= bs2;
956         i2 -= bs;
957       }
958       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
959     }
960     its--;
961   }
962   while (its--) { /* FIXME: This branch not updated */
963     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
964       i2    = 0;
965       idiag = kaij->ibdiag;
966       for (i = 0; i < m; i++) {
967         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
968 
969         v     = aa + ai[i];
970         vi    = aj + ai[i];
971         nz    = diag[i] - ai[i];
972         workt = work;
973         for (j = 0; j < nz; j++) {
974           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
975           workt += bs;
976         }
977         arrt = arr;
978         if (T) {
979           for (j = 0; j < nz; j++) {
980             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
981             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
982             arrt += bs2;
983           }
984           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
985         } else if (kaij->isTI) {
986           for (j = 0; j < nz; j++) {
987             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
988             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
989             arrt += bs2;
990           }
991           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
992         }
993         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
994 
995         v     = aa + diag[i] + 1;
996         vi    = aj + diag[i] + 1;
997         nz    = ai[i + 1] - diag[i] - 1;
998         workt = work;
999         for (j = 0; j < nz; j++) {
1000           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1001           workt += bs;
1002         }
1003         arrt = arr;
1004         if (T) {
1005           for (j = 0; j < nz; j++) {
1006             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1007             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1008             arrt += bs2;
1009           }
1010           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1011         } else if (kaij->isTI) {
1012           for (j = 0; j < nz; j++) {
1013             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1014             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1015             arrt += bs2;
1016           }
1017           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1018         }
1019 
1020         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1021         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1022 
1023         idiag += bs2;
1024         i2 += bs;
1025       }
1026       xb = t;
1027     } else xb = b;
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = kaij->ibdiag + bs2 * (m - 1);
1030       i2    = bs * (m - 1);
1031       if (xb == b) {
1032         for (i = m - 1; i >= 0; i--) {
1033           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1034 
1035           v     = aa + ai[i];
1036           vi    = aj + ai[i];
1037           nz    = diag[i] - ai[i];
1038           workt = work;
1039           for (j = 0; j < nz; j++) {
1040             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1041             workt += bs;
1042           }
1043           arrt = arr;
1044           if (T) {
1045             for (j = 0; j < nz; j++) {
1046               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1047               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1048               arrt += bs2;
1049             }
1050             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1051           } else if (kaij->isTI) {
1052             for (j = 0; j < nz; j++) {
1053               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1054               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1055               arrt += bs2;
1056             }
1057             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1058           }
1059 
1060           v     = aa + diag[i] + 1;
1061           vi    = aj + diag[i] + 1;
1062           nz    = ai[i + 1] - diag[i] - 1;
1063           workt = work;
1064           for (j = 0; j < nz; j++) {
1065             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1066             workt += bs;
1067           }
1068           arrt = arr;
1069           if (T) {
1070             for (j = 0; j < nz; j++) {
1071               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1072               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1073               arrt += bs2;
1074             }
1075             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1076           } else if (kaij->isTI) {
1077             for (j = 0; j < nz; j++) {
1078               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1079               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1080               arrt += bs2;
1081             }
1082             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1083           }
1084 
1085           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1086           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1087         }
1088       } else {
1089         for (i = m - 1; i >= 0; i--) {
1090           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1091           v     = aa + diag[i] + 1;
1092           vi    = aj + diag[i] + 1;
1093           nz    = ai[i + 1] - diag[i] - 1;
1094           workt = work;
1095           for (j = 0; j < nz; j++) {
1096             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1097             workt += bs;
1098           }
1099           arrt = arr;
1100           if (T) {
1101             for (j = 0; j < nz; j++) {
1102               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1103               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1104               arrt += bs2;
1105             }
1106             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1107           } else if (kaij->isTI) {
1108             for (j = 0; j < nz; j++) {
1109               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1110               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1111               arrt += bs2;
1112             }
1113             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1114           }
1115           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1116           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1117         }
1118       }
1119       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1120     }
1121   }
1122 
1123   PetscCall(VecRestoreArray(xx, &x));
1124   PetscCall(VecRestoreArrayRead(bb, &b));
1125   PetscFunctionReturn(PETSC_SUCCESS);
1126 }
1127 
1128 /*===================================================================================*/
1129 
1130 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1131 {
1132   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1133 
1134   PetscFunctionBegin;
1135   if (!yy) {
1136     PetscCall(VecSet(zz, 0.0));
1137   } else {
1138     PetscCall(VecCopy(yy, zz));
1139   }
1140   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1141   /* start the scatter */
1142   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1143   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1144   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1146   PetscFunctionReturn(PETSC_SUCCESS);
1147 }
1148 
1149 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1150 {
1151   PetscFunctionBegin;
1152   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1153   PetscFunctionReturn(PETSC_SUCCESS);
1154 }
1155 
1156 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1157 {
1158   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1162   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1167 {
1168   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1169   PetscBool    diag = PETSC_FALSE;
1170   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1171   PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1172 
1173   PetscFunctionBegin;
1174   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1175   b->getrowactive = PETSC_TRUE;
1176   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1177 
1178   if ((!S) && (!T) && (!b->isTI)) {
1179     if (ncols) *ncols = 0;
1180     if (cols) *cols = NULL;
1181     if (values) *values = NULL;
1182     PetscFunctionReturn(PETSC_SUCCESS);
1183   }
1184 
1185   if (T || b->isTI) {
1186     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1187     c = nzaij;
1188     for (i = 0; i < nzaij; i++) {
1189       /* check if this row contains a diagonal entry */
1190       if (colsaij[i] == r) {
1191         diag = PETSC_TRUE;
1192         c    = i;
1193       }
1194     }
1195   } else nzaij = c = 0;
1196 
1197   /* calculate size of row */
1198   nz = 0;
1199   if (S) nz += q;
1200   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1201 
1202   if (cols || values) {
1203     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1204     for (i = 0; i < q; i++) {
1205       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1206       v[i] = 0.0;
1207     }
1208     if (b->isTI) {
1209       for (i = 0; i < nzaij; i++) {
1210         for (j = 0; j < q; j++) {
1211           idx[i * q + j] = colsaij[i] * q + j;
1212           v[i * q + j]   = (j == s ? vaij[i] : 0);
1213         }
1214       }
1215     } else if (T) {
1216       for (i = 0; i < nzaij; i++) {
1217         for (j = 0; j < q; j++) {
1218           idx[i * q + j] = colsaij[i] * q + j;
1219           v[i * q + j]   = vaij[i] * T[s + j * p];
1220         }
1221       }
1222     }
1223     if (S) {
1224       for (j = 0; j < q; j++) {
1225         idx[c * q + j] = r * q + j;
1226         v[c * q + j] += S[s + j * p];
1227       }
1228     }
1229   }
1230 
1231   if (ncols) *ncols = nz;
1232   if (cols) *cols = idx;
1233   if (values) *values = v;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1238 {
1239   PetscFunctionBegin;
1240   if (nz) *nz = 0;
1241   PetscCall(PetscFree2(*idx, *v));
1242   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
1246 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1247 {
1248   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1249   Mat            AIJ  = b->A;
1250   PetscBool      diag = PETSC_FALSE;
1251   Mat            MatAIJ, MatOAIJ;
1252   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1253   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1254   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;
1255 
1256   PetscFunctionBegin;
1257   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1258   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1259   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1260   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1261   b->getrowactive = PETSC_TRUE;
1262   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1263   lrow = row - rstart;
1264 
1265   if ((!S) && (!T) && (!b->isTI)) {
1266     if (ncols) *ncols = 0;
1267     if (cols) *cols = NULL;
1268     if (values) *values = NULL;
1269     PetscFunctionReturn(PETSC_SUCCESS);
1270   }
1271 
1272   r = lrow / p;
1273   s = lrow % p;
1274 
1275   if (T || b->isTI) {
1276     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1277     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1278     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1279 
1280     c = ncolsaij + ncolsoaij;
1281     for (i = 0; i < ncolsaij; i++) {
1282       /* check if this row contains a diagonal entry */
1283       if (colsaij[i] == r) {
1284         diag = PETSC_TRUE;
1285         c    = i;
1286       }
1287     }
1288   } else c = 0;
1289 
1290   /* calculate size of row */
1291   nz = 0;
1292   if (S) nz += q;
1293   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1294 
1295   if (cols || values) {
1296     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1297     for (i = 0; i < q; i++) {
1298       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1299       v[i] = 0.0;
1300     }
1301     if (b->isTI) {
1302       for (i = 0; i < ncolsaij; i++) {
1303         for (j = 0; j < q; j++) {
1304           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1305           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1306         }
1307       }
1308       for (i = 0; i < ncolsoaij; i++) {
1309         for (j = 0; j < q; j++) {
1310           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1311           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1312         }
1313       }
1314     } else if (T) {
1315       for (i = 0; i < ncolsaij; i++) {
1316         for (j = 0; j < q; j++) {
1317           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1318           v[i * q + j]   = vals[i] * T[s + j * p];
1319         }
1320       }
1321       for (i = 0; i < ncolsoaij; i++) {
1322         for (j = 0; j < q; j++) {
1323           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1324           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1325         }
1326       }
1327     }
1328     if (S) {
1329       for (j = 0; j < q; j++) {
1330         idx[c * q + j] = (r + rstart / p) * q + j;
1331         v[c * q + j] += S[s + j * p];
1332       }
1333     }
1334   }
1335 
1336   if (ncols) *ncols = nz;
1337   if (cols) *cols = idx;
1338   if (values) *values = v;
1339   PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341 
1342 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1343 {
1344   PetscFunctionBegin;
1345   PetscCall(PetscFree2(*idx, *v));
1346   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1347   PetscFunctionReturn(PETSC_SUCCESS);
1348 }
1349 
1350 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1351 {
1352   Mat A;
1353 
1354   PetscFunctionBegin;
1355   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1356   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1357   PetscCall(MatDestroy(&A));
1358   PetscFunctionReturn(PETSC_SUCCESS);
1359 }
1360 
1361 /*@C
1362   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form
1363 .vb
1364     [I \otimes S + A \otimes T]
1365 .ve
1366   where
1367 .vb
1368     S is a dense (p \times q) matrix
1369     T is a dense (p \times q) matrix
1370     A is a `MATAIJ`  (n \times n) matrix
1371     I is the identity matrix
1372 .ve
1373   The resulting matrix is (np \times nq)
1374 
1375   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1376 
1377   Collective
1378 
1379   Input Parameters:
1380 + A - the `MATAIJ` matrix
1381 . p - number of rows in `S` and `T`
1382 . q - number of columns in `S` and `T`
1383 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1384 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1385 
1386   Output Parameter:
1387 . kaij - the new `MATKAIJ` matrix
1388 
1389   Level: advanced
1390 
1391   Notes:
1392   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1393 
1394   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1395 
1396   Developer Note:
1397   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1398   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1399   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1400   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1401 
1402 .seealso: [](chapter_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1403 @*/
1404 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1405 {
1406   PetscFunctionBegin;
1407   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1408   PetscCall(MatSetType(*kaij, MATKAIJ));
1409   PetscCall(MatKAIJSetAIJ(*kaij, A));
1410   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1411   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1412   PetscCall(MatSetUp(*kaij));
1413   PetscFunctionReturn(PETSC_SUCCESS);
1414 }
1415 
1416 /*MC
1417   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1418     [I \otimes S + A \otimes T],
1419   where
1420 .vb
1421     S is a dense (p \times q) matrix,
1422     T is a dense (p \times q) matrix,
1423     A is an AIJ  (n \times n) matrix,
1424     and I is the identity matrix.
1425 .ve
1426   The resulting matrix is (np \times nq).
1427 
1428   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1429 
1430   Level: advanced
1431 
1432   Note:
1433   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1434   where x and b are column vectors containing the row-major representations of X and B.
1435 
1436 .seealso: [](chapter_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1437 M*/
1438 
1439 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1440 {
1441   Mat_MPIKAIJ *b;
1442   PetscMPIInt  size;
1443 
1444   PetscFunctionBegin;
1445   PetscCall(PetscNew(&b));
1446   A->data = (void *)b;
1447 
1448   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1449 
1450   b->w = NULL;
1451   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1452   if (size == 1) {
1453     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1454     A->ops->destroy             = MatDestroy_SeqKAIJ;
1455     A->ops->mult                = MatMult_SeqKAIJ;
1456     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1457     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1458     A->ops->getrow              = MatGetRow_SeqKAIJ;
1459     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1460     A->ops->sor                 = MatSOR_SeqKAIJ;
1461     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1462   } else {
1463     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1464     A->ops->destroy             = MatDestroy_MPIKAIJ;
1465     A->ops->mult                = MatMult_MPIKAIJ;
1466     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1467     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1468     A->ops->getrow              = MatGetRow_MPIKAIJ;
1469     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1470     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1471     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1472   }
1473   A->ops->setup           = MatSetUp_KAIJ;
1474   A->ops->view            = MatView_KAIJ;
1475   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1476   PetscFunctionReturn(PETSC_SUCCESS);
1477 }
1478