xref: /petsc/src/mat/impls/kaij/kaij.c (revision 89669be4d29968dc8d4c19ce1b69194a6a561ea4)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: `MatCreateKAIJ()`
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscBool      ismpikaij,isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the KAIJ matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80 
81    Level: advanced
82 
83 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
84 @*/
85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
86 {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the KAIJ matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
109 
110    Level: advanced
111 
112 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
113 @*/
114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
115 {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the KAIJ matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with MatKAIJGetS()
134 
135   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
136   If NULL is passed, it will not attempt to zero the array pointer.
137 
138   Level: advanced
139 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
140 @*/
141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
142 {
143   PetscFunctionBegin;
144   if (S) *S = NULL;
145   PetscCall(PetscObjectStateIncrease((PetscObject)A));
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
151 
152   Not collective
153 
154   Input Parameter:
155 . A - the KAIJ matrix
156 
157   Output Parameter:
158 . S - location of pointer to array obtained with MatKAIJGetS()
159 
160   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
161   If NULL is passed, it will not attempt to zero the array pointer.
162 
163   Level: advanced
164 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
165 @*/
166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
167 {
168   PetscFunctionBegin;
169   if (S) *S = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
175 
176    Not Collective; the entire T is stored and returned independently on all processes
177 
178    Input Parameter:
179 .  A - the KAIJ matrix
180 
181    Output Parameters:
182 +  m - the number of rows in T
183 .  n - the number of columns in T
184 -  T - the T matrix, in form of a scalar array in column-major format
185 
186    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
187 
188    Level: advanced
189 
190 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
191 @*/
192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
193 {
194   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
195   PetscFunctionBegin;
196   if (m) *m = b->p;
197   if (n) *n = b->q;
198   if (T) *T = b->T;
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
204 
205    Not Collective; the entire T is stored and returned independently on all processes
206 
207    Input Parameter:
208 .  A - the KAIJ matrix
209 
210    Output Parameters:
211 +  m - the number of rows in T
212 .  n - the number of columns in T
213 -  T - the T matrix, in form of a scalar array in column-major format
214 
215    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
216 
217    Level: advanced
218 
219 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
220 @*/
221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
222 {
223   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
224   PetscFunctionBegin;
225   if (m) *m = b->p;
226   if (n) *n = b->q;
227   if (T) *T = b->T;
228   PetscFunctionReturn(0);
229 }
230 
231 /*@C
232   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
233 
234   Not collective
235 
236   Input Parameter:
237 . A - the KAIJ matrix
238 
239   Output Parameter:
240 . T - location of pointer to array obtained with MatKAIJGetS()
241 
242   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
243   If NULL is passed, it will not attempt to zero the array pointer.
244 
245   Level: advanced
246 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
247 @*/
248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
249 {
250   PetscFunctionBegin;
251   if (T) *T = NULL;
252   PetscCall(PetscObjectStateIncrease((PetscObject)A));
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
258 
259   Not collective
260 
261   Input Parameter:
262 . A - the KAIJ matrix
263 
264   Output Parameter:
265 . T - location of pointer to array obtained with MatKAIJGetS()
266 
267   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
268   If NULL is passed, it will not attempt to zero the array pointer.
269 
270   Level: advanced
271 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
272 @*/
273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
274 {
275   PetscFunctionBegin;
276   if (T) *T = NULL;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
282 
283    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
284 
285    Input Parameters:
286 +  A - the KAIJ matrix
287 -  B - the AIJ matrix
288 
289    Notes:
290    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
291    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
292 
293    Level: advanced
294 
295 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
296 @*/
297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
298 {
299   PetscMPIInt    size;
300   PetscBool      flg;
301 
302   PetscFunctionBegin;
303   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
304   if (size == 1) {
305     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg));
306     PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
307     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
308     a->AIJ = B;
309   } else {
310     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
311     a->A = B;
312   }
313   PetscCall(PetscObjectReference((PetscObject)B));
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
319 
320    Logically Collective; the entire S is stored independently on all processes.
321 
322    Input Parameters:
323 +  A - the KAIJ matrix
324 .  p - the number of rows in S
325 .  q - the number of columns in S
326 -  S - the S matrix, in form of a scalar array in column-major format
327 
328    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
329    The S matrix is copied, so the user can destroy this array.
330 
331    Level: Advanced
332 
333 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
334 @*/
335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
336 {
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   PetscCall(PetscFree(a->S));
341   if (S) {
342     PetscCall(PetscMalloc1(p*q,&a->S));
343     PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar)));
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
353 
354    Logically Collective.
355 
356    Input Parameter:
357 .  A - the KAIJ matrix
358 
359   Output Parameter:
360 .  identity - the Boolean value
361 
362    Level: Advanced
363 
364 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()`
365 @*/
366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
367 {
368   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
369   PetscInt    i,j;
370 
371   PetscFunctionBegin;
372   if (a->p != a->q) {
373     *identity = PETSC_FALSE;
374     PetscFunctionReturn(0);
375   } else *identity = PETSC_TRUE;
376   if (!a->isTI || a->S) {
377     for (i=0; i<a->p && *identity; i++) {
378       for (j=0; j<a->p && *identity; j++) {
379         if (i != j) {
380           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
381           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
382         } else {
383           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         }
386       }
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
394 
395    Logically Collective; the entire T is stored independently on all processes.
396 
397    Input Parameters:
398 +  A - the KAIJ matrix
399 .  p - the number of rows in S
400 .  q - the number of columns in S
401 -  T - the T matrix, in form of a scalar array in column-major format
402 
403    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
404    The T matrix is copied, so the user can destroy this array.
405 
406    Level: Advanced
407 
408 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
409 @*/
410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
411 {
412   PetscInt       i,j;
413   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
414   PetscBool      isTI = PETSC_FALSE;
415 
416   PetscFunctionBegin;
417   /* check if T is an identity matrix */
418   if (T && (p == q)) {
419     isTI = PETSC_TRUE;
420     for (i=0; i<p; i++) {
421       for (j=0; j<q; j++) {
422         if (i == j) {
423           /* diagonal term must be 1 */
424           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
425         } else {
426           /* off-diagonal term must be 0 */
427           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
428         }
429       }
430     }
431   }
432   a->isTI = isTI;
433 
434   PetscCall(PetscFree(a->T));
435   if (T && (!isTI)) {
436     PetscCall(PetscMalloc1(p*q,&a->T));
437     PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar)));
438   } else a->T = NULL;
439 
440   a->p = p;
441   a->q = q;
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
446 {
447   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
448 
449   PetscFunctionBegin;
450   PetscCall(MatDestroy(&b->AIJ));
451   PetscCall(PetscFree(b->S));
452   PetscCall(PetscFree(b->T));
453   PetscCall(PetscFree(b->ibdiag));
454   PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr));
455   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL));
456   PetscCall(PetscFree(A->data));
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
461 {
462   Mat_MPIKAIJ      *a;
463   Mat_MPIAIJ       *mpiaij;
464   PetscScalar      *T;
465   PetscInt         i,j;
466   PetscObjectState state;
467 
468   PetscFunctionBegin;
469   a = (Mat_MPIKAIJ*)A->data;
470   mpiaij = (Mat_MPIAIJ*)a->A->data;
471 
472   PetscCall(PetscObjectStateGet((PetscObject)a->A,&state));
473   if (state == a->state) {
474     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
475     PetscFunctionReturn(0);
476   } else {
477     PetscCall(MatDestroy(&a->AIJ));
478     PetscCall(MatDestroy(&a->OAIJ));
479     if (a->isTI) {
480       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
481        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
482        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
483        * to pass in. */
484       PetscCall(PetscMalloc1(a->p*a->q,&T));
485       for (i=0; i<a->p; i++) {
486         for (j=0; j<a->q; j++) {
487           if (i==j) T[i+j*a->p] = 1.0;
488           else      T[i+j*a->p] = 0.0;
489         }
490       }
491     } else T = a->T;
492     PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ));
493     PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ));
494     if (a->isTI) {
495       PetscCall(PetscFree(T));
496     }
497     a->state = state;
498   }
499 
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatSetUp_KAIJ(Mat A)
504 {
505   PetscInt       n;
506   PetscMPIInt    size;
507   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
508 
509   PetscFunctionBegin;
510   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
511   if (size == 1) {
512     PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N));
513     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
514     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
515     PetscCall(PetscLayoutSetUp(A->rmap));
516     PetscCall(PetscLayoutSetUp(A->cmap));
517   } else {
518     Mat_MPIKAIJ *a;
519     Mat_MPIAIJ  *mpiaij;
520     IS          from,to;
521     Vec         gvec;
522 
523     a = (Mat_MPIKAIJ*)A->data;
524     mpiaij = (Mat_MPIAIJ*)a->A->data;
525     PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N));
526     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
527     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
528     PetscCall(PetscLayoutSetUp(A->rmap));
529     PetscCall(PetscLayoutSetUp(A->cmap));
530 
531     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
532 
533     PetscCall(VecGetSize(mpiaij->lvec,&n));
534     PetscCall(VecCreate(PETSC_COMM_SELF,&a->w));
535     PetscCall(VecSetSizes(a->w,n*a->q,n*a->q));
536     PetscCall(VecSetBlockSize(a->w,a->q));
537     PetscCall(VecSetType(a->w,VECSEQ));
538 
539     /* create two temporary Index sets for build scatter gather */
540     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
541     PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to));
542 
543     /* create temporary global vector to generate scatter context */
544     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec));
545 
546     /* generate the scatter context */
547     PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx));
548 
549     PetscCall(ISDestroy(&from));
550     PetscCall(ISDestroy(&to));
551     PetscCall(VecDestroy(&gvec));
552   }
553 
554   A->assembled = PETSC_TRUE;
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
559 {
560   PetscViewerFormat format;
561   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
562   Mat               B;
563   PetscInt          i;
564   PetscBool         ismpikaij;
565 
566   PetscFunctionBegin;
567   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
568   PetscCall(PetscViewerGetFormat(viewer,&format));
569   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
570     PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q));
571 
572     /* Print appropriate details for S. */
573     if (!a->S) {
574       PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n"));
575     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are "));
577       for (i=0; i<(a->p * a->q); i++) {
578 #if defined(PETSC_USE_COMPLEX)
579         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i])));
580 #else
581         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i])));
582 #endif
583       }
584       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
585     }
586 
587     /* Print appropriate details for T. */
588     if (a->isTI) {
589       PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"));
590     } else if (!a->T) {
591       PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n"));
592     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
593       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are "));
594       for (i=0; i<(a->p * a->q); i++) {
595 #if defined(PETSC_USE_COMPLEX)
596         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i])));
597 #else
598         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i])));
599 #endif
600       }
601       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
602     }
603 
604     /* Now print details for the AIJ matrix, using the AIJ viewer. */
605     PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n"));
606     if (ismpikaij) {
607       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
608       PetscCall(MatView(b->A,viewer));
609     } else {
610       PetscCall(MatView(a->AIJ,viewer));
611     }
612 
613   } else {
614     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
615     PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B));
616     PetscCall(MatView(B,viewer));
617     PetscCall(MatDestroy(&B));
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
623 {
624   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
625 
626   PetscFunctionBegin;
627   PetscCall(MatDestroy(&b->AIJ));
628   PetscCall(MatDestroy(&b->OAIJ));
629   PetscCall(MatDestroy(&b->A));
630   PetscCall(VecScatterDestroy(&b->ctx));
631   PetscCall(VecDestroy(&b->w));
632   PetscCall(PetscFree(b->S));
633   PetscCall(PetscFree(b->T));
634   PetscCall(PetscFree(b->ibdiag));
635   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL));
637   PetscCall(PetscFree(A->data));
638   PetscFunctionReturn(0);
639 }
640 
641 /* --------------------------------------------------------------------------------------*/
642 
643 /* zz = yy + Axx */
644 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
645 {
646   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
648   const PetscScalar *s = b->S, *t = b->T;
649   const PetscScalar *x,*v,*bx;
650   PetscScalar       *y,*sums;
651   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
652   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
653 
654   PetscFunctionBegin;
655   if (!yy) {
656     PetscCall(VecSet(zz,0.0));
657   } else {
658     PetscCall(VecCopy(yy,zz));
659   }
660   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
661 
662   PetscCall(VecGetArrayRead(xx,&x));
663   PetscCall(VecGetArray(zz,&y));
664   idx  = a->j;
665   v    = a->a;
666   ii   = a->i;
667 
668   if (b->isTI) {
669     for (i=0; i<m; i++) {
670       jrow = ii[i];
671       n    = ii[i+1] - jrow;
672       sums = y + p*i;
673       for (j=0; j<n; j++) {
674         for (k=0; k<p; k++) {
675           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
676         }
677       }
678     }
679     PetscCall(PetscLogFlops(3.0*(a->nz)*p));
680   } else if (t) {
681     for (i=0; i<m; i++) {
682       jrow = ii[i];
683       n    = ii[i+1] - jrow;
684       sums = y + p*i;
685       for (j=0; j<n; j++) {
686         for (k=0; k<p; k++) {
687           for (l=0; l<q; l++) {
688             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
689           }
690         }
691       }
692     }
693     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
694      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
695      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
696      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
697     PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz));
698   }
699   if (s) {
700     for (i=0; i<m; i++) {
701       sums = y + p*i;
702       bx   = x + q*i;
703       if (i < b->AIJ->cmap->n) {
704         for (j=0; j<q; j++) {
705           for (k=0; k<p; k++) {
706             sums[k] += s[k+j*p]*bx[j];
707           }
708         }
709       }
710     }
711     PetscCall(PetscLogFlops(2.0*m*p*q));
712   }
713 
714   PetscCall(VecRestoreArrayRead(xx,&x));
715   PetscCall(VecRestoreArray(zz,&y));
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
720 {
721   PetscFunctionBegin;
722   PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy));
723   PetscFunctionReturn(0);
724 }
725 
726 #include <petsc/private/kernels/blockinvert.h>
727 
728 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
729 {
730   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
731   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
732   const PetscScalar *S  = b->S;
733   const PetscScalar *T  = b->T;
734   const PetscScalar *v  = a->a;
735   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736   PetscInt          i,j,*v_pivots,dof,dof2;
737   PetscScalar       *diag,aval,*v_work;
738 
739   PetscFunctionBegin;
740   PetscCheck(p == q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
741   PetscCheck(S || T || b->isTI,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
742 
743   dof  = p;
744   dof2 = dof*dof;
745 
746   if (b->ibdiagvalid) {
747     if (values) *values = b->ibdiag;
748     PetscFunctionReturn(0);
749   }
750   if (!b->ibdiag) {
751     PetscCall(PetscMalloc1(dof2*m,&b->ibdiag));
752     PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar)));
753   }
754   if (values) *values = b->ibdiag;
755   diag = b->ibdiag;
756 
757   PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots));
758   for (i=0; i<m; i++) {
759     if (S) {
760       PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar)));
761     } else {
762       PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar)));
763     }
764     if (b->isTI) {
765       aval = 0;
766       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
767       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
771       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
772     }
773     PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL));
774     diag += dof2;
775   }
776   PetscCall(PetscFree2(v_work,v_pivots));
777 
778   b->ibdiagvalid = PETSC_TRUE;
779   PetscFunctionReturn(0);
780 }
781 
782 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
783 {
784   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
785 
786   PetscFunctionBegin;
787   *B = kaij->AIJ;
788   PetscFunctionReturn(0);
789 }
790 
791 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
792 {
793   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
794   Mat            AIJ,OAIJ,B;
795   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
796   const PetscInt p = a->p,q = a->q;
797   PetscBool      ismpikaij,missing;
798 
799   PetscFunctionBegin;
800   if (reuse != MAT_REUSE_MATRIX) {
801     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
802     if (ismpikaij) {
803       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
804       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
805       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
806     } else {
807       AIJ = a->AIJ;
808       OAIJ = NULL;
809     }
810     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
811     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
812     PetscCall(MatSetType(B,MATAIJ));
813     PetscCall(MatGetSize(AIJ,&m,NULL));
814     PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */
815     if (!missing || !a->S) d = m;
816     PetscCall(PetscMalloc1(m*p,&d_nnz));
817     for (i = 0; i < m; ++i) {
818       PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
819       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
820       PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
821     }
822     if (OAIJ) {
823       PetscCall(PetscMalloc1(m*p,&o_nnz));
824       for (i = 0; i < m; ++i) {
825         PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
826         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
827         PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
828       }
829       PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
830     } else {
831       PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz));
832     }
833     PetscCall(PetscFree(d_nnz));
834     PetscCall(PetscFree(o_nnz));
835   } else B = *newmat;
836   PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B));
837   if (reuse == MAT_INPLACE_MATRIX) {
838     PetscCall(MatHeaderReplace(A,&B));
839   } else *newmat = B;
840   PetscFunctionReturn(0);
841 }
842 
843 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
844 {
845   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
846   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
847   const PetscScalar *aa = a->a, *T = kaij->T, *v;
848   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
849   const PetscScalar *b, *xb, *idiag;
850   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
851   PetscInt          i, j, k, i2, bs, bs2, nz;
852 
853   PetscFunctionBegin;
854   its = its*lits;
855   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
856   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
857   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
858   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
859   PetscCheck(p == q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
860   else        {bs = p; bs2 = bs*bs; }
861 
862   if (!m) PetscFunctionReturn(0);
863 
864   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL));
865   idiag = kaij->ibdiag;
866   diag  = a->diag;
867 
868   if (!kaij->sor.setup) {
869     PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr));
870     kaij->sor.setup = PETSC_TRUE;
871   }
872   y     = kaij->sor.y;
873   w     = kaij->sor.w;
874   work  = kaij->sor.work;
875   t     = kaij->sor.t;
876   arr   = kaij->sor.arr;
877 
878   PetscCall( VecGetArray(xx,&x));
879   PetscCall(VecGetArrayRead(bb,&b));
880 
881   if (flag & SOR_ZERO_INITIAL_GUESS) {
882     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
883       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
884       PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar)));
885       i2     =  bs;
886       idiag  += bs2;
887       for (i=1; i<m; i++) {
888         v  = aa + ai[i];
889         vi = aj + ai[i];
890         nz = diag[i] - ai[i];
891 
892         if (T) {                /* b - T (Arow * x) */
893           PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar)));
894           for (j=0; j<nz; j++) {
895             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
896           }
897           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
898           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
899         } else if (kaij->isTI) {
900           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
901           for (j=0; j<nz; j++) {
902             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
903           }
904         } else {
905           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
906         }
907 
908         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
909         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
910 
911         idiag += bs2;
912         i2    += bs;
913       }
914       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
915       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
916       xb = t;
917     } else xb = b;
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
919       idiag = kaij->ibdiag+bs2*(m-1);
920       i2    = bs * (m-1);
921       PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
922       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
923       i2    -= bs;
924       idiag -= bs2;
925       for (i=m-2; i>=0; i--) {
926         v  = aa + diag[i] + 1 ;
927         vi = aj + diag[i] + 1;
928         nz = ai[i+1] - diag[i] - 1;
929 
930         if (T) {                /* FIXME: This branch untested */
931           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
932           /* copy all rows of x that are needed into contiguous space */
933           workt = work;
934           for (j=0; j<nz; j++) {
935             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
936             workt += bs;
937           }
938           arrt = arr;
939           for (j=0; j<nz; j++) {
940             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
941             for (k=0; k<bs2; k++) arrt[k] *= v[j];
942             arrt += bs2;
943           }
944           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
945         } else if (kaij->isTI) {
946           PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar)));
947           for (j=0; j<nz; j++) {
948             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
949           }
950         }
951 
952         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
953         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
954 
955         idiag -= bs2;
956         i2    -= bs;
957       }
958       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
959     }
960     its--;
961   }
962   while (its--) {               /* FIXME: This branch not updated */
963     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
964       i2     =  0;
965       idiag  = kaij->ibdiag;
966       for (i=0; i<m; i++) {
967         PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
968 
969         v  = aa + ai[i];
970         vi = aj + ai[i];
971         nz = diag[i] - ai[i];
972         workt = work;
973         for (j=0; j<nz; j++) {
974           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
975           workt += bs;
976         }
977         arrt = arr;
978         if (T) {
979           for (j=0; j<nz; j++) {
980             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
981             for (k=0; k<bs2; k++) arrt[k] *= v[j];
982             arrt += bs2;
983           }
984           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
985         } else if (kaij->isTI) {
986           for (j=0; j<nz; j++) {
987             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
988             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
989             arrt += bs2;
990           }
991           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
992         }
993         PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar)));
994 
995         v  = aa + diag[i] + 1;
996         vi = aj + diag[i] + 1;
997         nz = ai[i+1] - diag[i] - 1;
998         workt = work;
999         for (j=0; j<nz; j++) {
1000           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1001           workt += bs;
1002         }
1003         arrt = arr;
1004         if (T) {
1005           for (j=0; j<nz; j++) {
1006             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1007             for (k=0; k<bs2; k++) arrt[k] *= v[j];
1008             arrt += bs2;
1009           }
1010           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1011         } else if (kaij->isTI) {
1012           for (j=0; j<nz; j++) {
1013             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1014             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1015             arrt += bs2;
1016           }
1017           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1018         }
1019 
1020         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1021         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1022 
1023         idiag += bs2;
1024         i2    += bs;
1025       }
1026       xb = t;
1027     }
1028     else xb = b;
1029     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1030       idiag = kaij->ibdiag+bs2*(m-1);
1031       i2    = bs * (m-1);
1032       if (xb == b) {
1033         for (i=m-1; i>=0; i--) {
1034           PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
1035 
1036           v  = aa + ai[i];
1037           vi = aj + ai[i];
1038           nz = diag[i] - ai[i];
1039           workt = work;
1040           for (j=0; j<nz; j++) {
1041             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1042             workt += bs;
1043           }
1044           arrt = arr;
1045           if (T) {
1046             for (j=0; j<nz; j++) {
1047               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1048               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1049               arrt += bs2;
1050             }
1051             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1052           } else if (kaij->isTI) {
1053             for (j=0; j<nz; j++) {
1054               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1055               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1056               arrt += bs2;
1057             }
1058             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1059           }
1060 
1061           v  = aa + diag[i] + 1;
1062           vi = aj + diag[i] + 1;
1063           nz = ai[i+1] - diag[i] - 1;
1064           workt = work;
1065           for (j=0; j<nz; j++) {
1066             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1067             workt += bs;
1068           }
1069           arrt = arr;
1070           if (T) {
1071             for (j=0; j<nz; j++) {
1072               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1073               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1074               arrt += bs2;
1075             }
1076             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1077           } else if (kaij->isTI) {
1078             for (j=0; j<nz; j++) {
1079               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1080               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1081               arrt += bs2;
1082             }
1083             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1084           }
1085 
1086           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1087           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1088         }
1089       } else {
1090         for (i=m-1; i>=0; i--) {
1091           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
1092           v  = aa + diag[i] + 1;
1093           vi = aj + diag[i] + 1;
1094           nz = ai[i+1] - diag[i] - 1;
1095           workt = work;
1096           for (j=0; j<nz; j++) {
1097             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1098             workt += bs;
1099           }
1100           arrt = arr;
1101           if (T) {
1102             for (j=0; j<nz; j++) {
1103               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1104               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1105               arrt += bs2;
1106             }
1107             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1108           } else if (kaij->isTI) {
1109             for (j=0; j<nz; j++) {
1110               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1111               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1112               arrt += bs2;
1113             }
1114             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1115           }
1116           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1117           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1118         }
1119       }
1120       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
1121     }
1122   }
1123 
1124   PetscCall(VecRestoreArray(xx,&x));
1125   PetscCall(VecRestoreArrayRead(bb,&b));
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 /*===================================================================================*/
1130 
1131 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (!yy) {
1137     PetscCall(VecSet(zz,0.0));
1138   } else {
1139     PetscCall(VecCopy(yy,zz));
1140   }
1141   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1142   /* start the scatter */
1143   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1144   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz));
1145   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1146   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1151 {
1152   PetscFunctionBegin;
1153   PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy));
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1158 {
1159   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1160 
1161   PetscFunctionBegin;
1162   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1163   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 /* ----------------------------------------------------------------*/
1168 
1169 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1170 {
1171   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1172   PetscErrorCode  diag = PETSC_FALSE;
1173   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1174   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1175 
1176   PetscFunctionBegin;
1177   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1178   b->getrowactive = PETSC_TRUE;
1179   PetscCheck(row >= 0 && row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1180 
1181   if ((!S) && (!T) && (!b->isTI)) {
1182     if (ncols)    *ncols  = 0;
1183     if (cols)     *cols   = NULL;
1184     if (values)   *values = NULL;
1185     PetscFunctionReturn(0);
1186   }
1187 
1188   if (T || b->isTI) {
1189     PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij));
1190     c     = nzaij;
1191     for (i=0; i<nzaij; i++) {
1192       /* check if this row contains a diagonal entry */
1193       if (colsaij[i] == r) {
1194         diag = PETSC_TRUE;
1195         c = i;
1196       }
1197     }
1198   } else nzaij = c = 0;
1199 
1200   /* calculate size of row */
1201   nz = 0;
1202   if (S)            nz += q;
1203   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1204 
1205   if (cols || values) {
1206     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1207     for (i=0; i<q; i++) {
1208       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1209       v[i] = 0.0;
1210     }
1211     if (b->isTI) {
1212       for (i=0; i<nzaij; i++) {
1213         for (j=0; j<q; j++) {
1214           idx[i*q+j] = colsaij[i]*q+j;
1215           v[i*q+j]   = (j==s ? vaij[i] : 0);
1216         }
1217       }
1218     } else if (T) {
1219       for (i=0; i<nzaij; i++) {
1220         for (j=0; j<q; j++) {
1221           idx[i*q+j] = colsaij[i]*q+j;
1222           v[i*q+j]   = vaij[i]*T[s+j*p];
1223         }
1224       }
1225     }
1226     if (S) {
1227       for (j=0; j<q; j++) {
1228         idx[c*q+j] = r*q+j;
1229         v[c*q+j]  += S[s+j*p];
1230       }
1231     }
1232   }
1233 
1234   if (ncols)    *ncols  = nz;
1235   if (cols)     *cols   = idx;
1236   if (values)   *values = v;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1241 {
1242   PetscFunctionBegin;
1243   if (nz) *nz = 0;
1244   PetscCall(PetscFree2(*idx,*v));
1245   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1250 {
1251   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1252   Mat             AIJ     = b->A;
1253   PetscBool       diag    = PETSC_FALSE;
1254   Mat             MatAIJ,MatOAIJ;
1255   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1256   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1257   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1258 
1259   PetscFunctionBegin;
1260   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1261   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1262   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1263   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1264   b->getrowactive = PETSC_TRUE;
1265   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1266   lrow = row - rstart;
1267 
1268   if ((!S) && (!T) && (!b->isTI)) {
1269     if (ncols)    *ncols  = 0;
1270     if (cols)     *cols   = NULL;
1271     if (values)   *values = NULL;
1272     PetscFunctionReturn(0);
1273   }
1274 
1275   r = lrow/p;
1276   s = lrow%p;
1277 
1278   if (T || b->isTI) {
1279     PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray));
1280     PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals));
1281     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals));
1282 
1283     c     = ncolsaij + ncolsoaij;
1284     for (i=0; i<ncolsaij; i++) {
1285       /* check if this row contains a diagonal entry */
1286       if (colsaij[i] == r) {
1287         diag = PETSC_TRUE;
1288         c = i;
1289       }
1290     }
1291   } else c = 0;
1292 
1293   /* calculate size of row */
1294   nz = 0;
1295   if (S)            nz += q;
1296   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1297 
1298   if (cols || values) {
1299     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1300     for (i=0; i<q; i++) {
1301       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1302       v[i] = 0.0;
1303     }
1304     if (b->isTI) {
1305       for (i=0; i<ncolsaij; i++) {
1306         for (j=0; j<q; j++) {
1307           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1308           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1309         }
1310       }
1311       for (i=0; i<ncolsoaij; i++) {
1312         for (j=0; j<q; j++) {
1313           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1314           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1315         }
1316       }
1317     } else if (T) {
1318       for (i=0; i<ncolsaij; i++) {
1319         for (j=0; j<q; j++) {
1320           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1321           v[i*q+j]   = vals[i]*T[s+j*p];
1322         }
1323       }
1324       for (i=0; i<ncolsoaij; i++) {
1325         for (j=0; j<q; j++) {
1326           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1327           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1328         }
1329       }
1330     }
1331     if (S) {
1332       for (j=0; j<q; j++) {
1333         idx[c*q+j] = (r+rstart/p)*q+j;
1334         v[c*q+j]  += S[s+j*p];
1335       }
1336     }
1337   }
1338 
1339   if (ncols)  *ncols  = nz;
1340   if (cols)   *cols   = idx;
1341   if (values) *values = v;
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1346 {
1347   PetscFunctionBegin;
1348   PetscCall(PetscFree2(*idx,*v));
1349   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1354 {
1355   Mat            A;
1356 
1357   PetscFunctionBegin;
1358   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
1359   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
1360   PetscCall(MatDestroy(&A));
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 /* ---------------------------------------------------------------------------------- */
1365 /*@C
1366   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1367 
1368     [I \otimes S + A \otimes T]
1369 
1370   where
1371     S is a dense (p \times q) matrix
1372     T is a dense (p \times q) matrix
1373     A is an AIJ  (n \times n) matrix
1374     I is the identity matrix
1375   The resulting matrix is (np \times nq)
1376 
1377   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1378 
1379   Collective
1380 
1381   Input Parameters:
1382 + A - the AIJ matrix
1383 . p - number of rows in S and T
1384 . q - number of columns in S and T
1385 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1386 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1387 
1388   Output Parameter:
1389 . kaij - the new KAIJ matrix
1390 
1391   Notes:
1392   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1393   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1394 
1395   Developer Notes:
1396   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1397   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1398   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1399   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1400 
1401   Level: advanced
1402 
1403 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1404 @*/
1405 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1406 {
1407   PetscFunctionBegin;
1408   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij));
1409   PetscCall(MatSetType(*kaij,MATKAIJ));
1410   PetscCall(MatKAIJSetAIJ(*kaij,A));
1411   PetscCall(MatKAIJSetS(*kaij,p,q,S));
1412   PetscCall(MatKAIJSetT(*kaij,p,q,T));
1413   PetscCall(MatSetUp(*kaij));
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 /*MC
1418   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1419     [I \otimes S + A \otimes T],
1420   where
1421     S is a dense (p \times q) matrix,
1422     T is a dense (p \times q) matrix,
1423     A is an AIJ  (n \times n) matrix,
1424     and I is the identity matrix.
1425   The resulting matrix is (np \times nq).
1426 
1427   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1428 
1429   Notes:
1430   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1431   where x and b are column vectors containing the row-major representations of X and B.
1432 
1433   Level: advanced
1434 
1435 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1436 M*/
1437 
1438 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1439 {
1440   Mat_MPIKAIJ    *b;
1441   PetscMPIInt    size;
1442 
1443   PetscFunctionBegin;
1444   PetscCall(PetscNewLog(A,&b));
1445   A->data  = (void*)b;
1446 
1447   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
1448 
1449   b->w    = NULL;
1450   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
1451   if (size == 1) {
1452     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ));
1453     A->ops->destroy             = MatDestroy_SeqKAIJ;
1454     A->ops->mult                = MatMult_SeqKAIJ;
1455     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1456     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1457     A->ops->getrow              = MatGetRow_SeqKAIJ;
1458     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1459     A->ops->sor                 = MatSOR_SeqKAIJ;
1460     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ));
1461   } else {
1462     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ));
1463     A->ops->destroy             = MatDestroy_MPIKAIJ;
1464     A->ops->mult                = MatMult_MPIKAIJ;
1465     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1466     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1467     A->ops->getrow              = MatGetRow_MPIKAIJ;
1468     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1469     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ));
1470     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ));
1471   }
1472   A->ops->setup           = MatSetUp_KAIJ;
1473   A->ops->view            = MatView_KAIJ;
1474   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1475   PetscFunctionReturn(0);
1476 }
1477