xref: /petsc/src/mat/impls/kaij/kaij.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscBool      ismpikaij,isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the KAIJ matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80 
81    Level: advanced
82 
83 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
84 @*/
85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
86 {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the KAIJ matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
109 
110    Level: advanced
111 
112 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
113 @*/
114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
115 {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the KAIJ matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with MatKAIJGetS()
134 
135   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
136   If NULL is passed, it will not attempt to zero the array pointer.
137 
138   Level: advanced
139 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
140 @*/
141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
142 {
143   PetscFunctionBegin;
144   if (S) *S = NULL;
145   PetscCall(PetscObjectStateIncrease((PetscObject)A));
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
151 
152   Not collective
153 
154   Input Parameter:
155 . A - the KAIJ matrix
156 
157   Output Parameter:
158 . S - location of pointer to array obtained with MatKAIJGetS()
159 
160   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
161   If NULL is passed, it will not attempt to zero the array pointer.
162 
163   Level: advanced
164 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
165 @*/
166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
167 {
168   PetscFunctionBegin;
169   if (S) *S = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
175 
176    Not Collective; the entire T is stored and returned independently on all processes
177 
178    Input Parameter:
179 .  A - the KAIJ matrix
180 
181    Output Parameters:
182 +  m - the number of rows in T
183 .  n - the number of columns in T
184 -  T - the T matrix, in form of a scalar array in column-major format
185 
186    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
187 
188    Level: advanced
189 
190 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
191 @*/
192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
193 {
194   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
195   PetscFunctionBegin;
196   if (m) *m = b->p;
197   if (n) *n = b->q;
198   if (T) *T = b->T;
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
204 
205    Not Collective; the entire T is stored and returned independently on all processes
206 
207    Input Parameter:
208 .  A - the KAIJ matrix
209 
210    Output Parameters:
211 +  m - the number of rows in T
212 .  n - the number of columns in T
213 -  T - the T matrix, in form of a scalar array in column-major format
214 
215    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
216 
217    Level: advanced
218 
219 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
220 @*/
221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
222 {
223   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
224   PetscFunctionBegin;
225   if (m) *m = b->p;
226   if (n) *n = b->q;
227   if (T) *T = b->T;
228   PetscFunctionReturn(0);
229 }
230 
231 /*@C
232   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
233 
234   Not collective
235 
236   Input Parameter:
237 . A - the KAIJ matrix
238 
239   Output Parameter:
240 . T - location of pointer to array obtained with MatKAIJGetS()
241 
242   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
243   If NULL is passed, it will not attempt to zero the array pointer.
244 
245   Level: advanced
246 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
247 @*/
248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
249 {
250   PetscFunctionBegin;
251   if (T) *T = NULL;
252   PetscCall(PetscObjectStateIncrease((PetscObject)A));
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
258 
259   Not collective
260 
261   Input Parameter:
262 . A - the KAIJ matrix
263 
264   Output Parameter:
265 . T - location of pointer to array obtained with MatKAIJGetS()
266 
267   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
268   If NULL is passed, it will not attempt to zero the array pointer.
269 
270   Level: advanced
271 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
272 @*/
273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
274 {
275   PetscFunctionBegin;
276   if (T) *T = NULL;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
282 
283    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
284 
285    Input Parameters:
286 +  A - the KAIJ matrix
287 -  B - the AIJ matrix
288 
289    Notes:
290    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
291    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
292 
293    Level: advanced
294 
295 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
296 @*/
297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
298 {
299   PetscMPIInt    size;
300   PetscBool      flg;
301 
302   PetscFunctionBegin;
303   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
304   if (size == 1) {
305     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg));
306     PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
307     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
308     a->AIJ = B;
309   } else {
310     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
311     a->A = B;
312   }
313   PetscCall(PetscObjectReference((PetscObject)B));
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
319 
320    Logically Collective; the entire S is stored independently on all processes.
321 
322    Input Parameters:
323 +  A - the KAIJ matrix
324 .  p - the number of rows in S
325 .  q - the number of columns in S
326 -  S - the S matrix, in form of a scalar array in column-major format
327 
328    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
329    The S matrix is copied, so the user can destroy this array.
330 
331    Level: Advanced
332 
333 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
334 @*/
335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
336 {
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   PetscCall(PetscFree(a->S));
341   if (S) {
342     PetscCall(PetscMalloc1(p*q,&a->S));
343     PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar)));
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
353 
354    Logically Collective.
355 
356    Input Parameter:
357 .  A - the KAIJ matrix
358 
359   Output Parameter:
360 .  identity - the Boolean value
361 
362    Level: Advanced
363 
364 .seealso: MatKAIJGetS(), MatKAIJGetT()
365 @*/
366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
367 {
368   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
369   PetscInt    i,j;
370 
371   PetscFunctionBegin;
372   if (a->p != a->q) {
373     *identity = PETSC_FALSE;
374     PetscFunctionReturn(0);
375   } else *identity = PETSC_TRUE;
376   if (!a->isTI || a->S) {
377     for (i=0; i<a->p && *identity; i++) {
378       for (j=0; j<a->p && *identity; j++) {
379         if (i != j) {
380           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
381           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
382         } else {
383           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         }
386       }
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
394 
395    Logically Collective; the entire T is stored independently on all processes.
396 
397    Input Parameters:
398 +  A - the KAIJ matrix
399 .  p - the number of rows in S
400 .  q - the number of columns in S
401 -  T - the T matrix, in form of a scalar array in column-major format
402 
403    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
404    The T matrix is copied, so the user can destroy this array.
405 
406    Level: Advanced
407 
408 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
409 @*/
410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
411 {
412   PetscInt       i,j;
413   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
414   PetscBool      isTI = PETSC_FALSE;
415 
416   PetscFunctionBegin;
417   /* check if T is an identity matrix */
418   if (T && (p == q)) {
419     isTI = PETSC_TRUE;
420     for (i=0; i<p; i++) {
421       for (j=0; j<q; j++) {
422         if (i == j) {
423           /* diagonal term must be 1 */
424           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
425         } else {
426           /* off-diagonal term must be 0 */
427           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
428         }
429       }
430     }
431   }
432   a->isTI = isTI;
433 
434   PetscCall(PetscFree(a->T));
435   if (T && (!isTI)) {
436     PetscCall(PetscMalloc1(p*q,&a->T));
437     PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar)));
438   } else a->T = NULL;
439 
440   a->p = p;
441   a->q = q;
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
446 {
447   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
448 
449   PetscFunctionBegin;
450   PetscCall(MatDestroy(&b->AIJ));
451   PetscCall(PetscFree(b->S));
452   PetscCall(PetscFree(b->T));
453   PetscCall(PetscFree(b->ibdiag));
454   PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr));
455   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL));
456   PetscCall(PetscFree(A->data));
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
461 {
462   Mat_MPIKAIJ      *a;
463   Mat_MPIAIJ       *mpiaij;
464   PetscScalar      *T;
465   PetscInt         i,j;
466   PetscObjectState state;
467 
468   PetscFunctionBegin;
469   a = (Mat_MPIKAIJ*)A->data;
470   mpiaij = (Mat_MPIAIJ*)a->A->data;
471 
472   PetscCall(PetscObjectStateGet((PetscObject)a->A,&state));
473   if (state == a->state) {
474     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
475     PetscFunctionReturn(0);
476   } else {
477     PetscCall(MatDestroy(&a->AIJ));
478     PetscCall(MatDestroy(&a->OAIJ));
479     if (a->isTI) {
480       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
481        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
482        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
483        * to pass in. */
484       PetscCall(PetscMalloc1(a->p*a->q,&T));
485       for (i=0; i<a->p; i++) {
486         for (j=0; j<a->q; j++) {
487           if (i==j) T[i+j*a->p] = 1.0;
488           else      T[i+j*a->p] = 0.0;
489         }
490       }
491     } else T = a->T;
492     PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ));
493     PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ));
494     if (a->isTI) {
495       PetscCall(PetscFree(T));
496     }
497     a->state = state;
498   }
499 
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatSetUp_KAIJ(Mat A)
504 {
505   PetscInt       n;
506   PetscMPIInt    size;
507   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
508 
509   PetscFunctionBegin;
510   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
511   if (size == 1) {
512     PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N));
513     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
514     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
515     PetscCall(PetscLayoutSetUp(A->rmap));
516     PetscCall(PetscLayoutSetUp(A->cmap));
517   } else {
518     Mat_MPIKAIJ *a;
519     Mat_MPIAIJ  *mpiaij;
520     IS          from,to;
521     Vec         gvec;
522 
523     a = (Mat_MPIKAIJ*)A->data;
524     mpiaij = (Mat_MPIAIJ*)a->A->data;
525     PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N));
526     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
527     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
528     PetscCall(PetscLayoutSetUp(A->rmap));
529     PetscCall(PetscLayoutSetUp(A->cmap));
530 
531     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
532 
533     PetscCall(VecGetSize(mpiaij->lvec,&n));
534     PetscCall(VecCreate(PETSC_COMM_SELF,&a->w));
535     PetscCall(VecSetSizes(a->w,n*a->q,n*a->q));
536     PetscCall(VecSetBlockSize(a->w,a->q));
537     PetscCall(VecSetType(a->w,VECSEQ));
538 
539     /* create two temporary Index sets for build scatter gather */
540     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
541     PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to));
542 
543     /* create temporary global vector to generate scatter context */
544     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec));
545 
546     /* generate the scatter context */
547     PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx));
548 
549     PetscCall(ISDestroy(&from));
550     PetscCall(ISDestroy(&to));
551     PetscCall(VecDestroy(&gvec));
552   }
553 
554   A->assembled = PETSC_TRUE;
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
559 {
560   PetscViewerFormat format;
561   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
562   Mat               B;
563   PetscInt          i;
564   PetscBool         ismpikaij;
565 
566   PetscFunctionBegin;
567   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
568   PetscCall(PetscViewerGetFormat(viewer,&format));
569   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
570     PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q));
571 
572     /* Print appropriate details for S. */
573     if (!a->S) {
574       PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n"));
575     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are "));
577       for (i=0; i<(a->p * a->q); i++) {
578 #if defined(PETSC_USE_COMPLEX)
579         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i])));
580 #else
581         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i])));
582 #endif
583       }
584       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
585     }
586 
587     /* Print appropriate details for T. */
588     if (a->isTI) {
589       PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"));
590     } else if (!a->T) {
591       PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n"));
592     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
593       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are "));
594       for (i=0; i<(a->p * a->q); i++) {
595 #if defined(PETSC_USE_COMPLEX)
596         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i])));
597 #else
598         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i])));
599 #endif
600       }
601       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
602     }
603 
604     /* Now print details for the AIJ matrix, using the AIJ viewer. */
605     PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n"));
606     if (ismpikaij) {
607       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
608       PetscCall(MatView(b->A,viewer));
609     } else {
610       PetscCall(MatView(a->AIJ,viewer));
611     }
612 
613   } else {
614     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
615     PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B));
616     PetscCall(MatView(B,viewer));
617     PetscCall(MatDestroy(&B));
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
623 {
624   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
625 
626   PetscFunctionBegin;
627   PetscCall(MatDestroy(&b->AIJ));
628   PetscCall(MatDestroy(&b->OAIJ));
629   PetscCall(MatDestroy(&b->A));
630   PetscCall(VecScatterDestroy(&b->ctx));
631   PetscCall(VecDestroy(&b->w));
632   PetscCall(PetscFree(b->S));
633   PetscCall(PetscFree(b->T));
634   PetscCall(PetscFree(b->ibdiag));
635   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL));
637   PetscCall(PetscFree(A->data));
638   PetscFunctionReturn(0);
639 }
640 
641 /* --------------------------------------------------------------------------------------*/
642 
643 /* zz = yy + Axx */
644 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
645 {
646   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
648   const PetscScalar *s = b->S, *t = b->T;
649   const PetscScalar *x,*v,*bx;
650   PetscScalar       *y,*sums;
651   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
652   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
653 
654   PetscFunctionBegin;
655   if (!yy) {
656     PetscCall(VecSet(zz,0.0));
657   } else {
658     PetscCall(VecCopy(yy,zz));
659   }
660   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
661 
662   PetscCall(VecGetArrayRead(xx,&x));
663   PetscCall(VecGetArray(zz,&y));
664   idx  = a->j;
665   v    = a->a;
666   ii   = a->i;
667 
668   if (b->isTI) {
669     for (i=0; i<m; i++) {
670       jrow = ii[i];
671       n    = ii[i+1] - jrow;
672       sums = y + p*i;
673       for (j=0; j<n; j++) {
674         for (k=0; k<p; k++) {
675           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
676         }
677       }
678     }
679     PetscCall(PetscLogFlops(3.0*(a->nz)*p));
680   } else if (t) {
681     for (i=0; i<m; i++) {
682       jrow = ii[i];
683       n    = ii[i+1] - jrow;
684       sums = y + p*i;
685       for (j=0; j<n; j++) {
686         for (k=0; k<p; k++) {
687           for (l=0; l<q; l++) {
688             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
689           }
690         }
691       }
692     }
693     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
694      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
695      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
696      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
697     PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz));
698   }
699   if (s) {
700     for (i=0; i<m; i++) {
701       sums = y + p*i;
702       bx   = x + q*i;
703       if (i < b->AIJ->cmap->n) {
704         for (j=0; j<q; j++) {
705           for (k=0; k<p; k++) {
706             sums[k] += s[k+j*p]*bx[j];
707           }
708         }
709       }
710     }
711     PetscCall(PetscLogFlops(2.0*m*p*q));
712   }
713 
714   PetscCall(VecRestoreArrayRead(xx,&x));
715   PetscCall(VecRestoreArray(zz,&y));
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
720 {
721   PetscFunctionBegin;
722   PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy));
723   PetscFunctionReturn(0);
724 }
725 
726 #include <petsc/private/kernels/blockinvert.h>
727 
728 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
729 {
730   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
731   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
732   const PetscScalar *S  = b->S;
733   const PetscScalar *T  = b->T;
734   const PetscScalar *v  = a->a;
735   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736   PetscInt          i,j,*v_pivots,dof,dof2;
737   PetscScalar       *diag,aval,*v_work;
738 
739   PetscFunctionBegin;
740   PetscCheckFalse(p != q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
741   PetscCheckFalse((!S) && (!T) && (!b->isTI),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
742 
743   dof  = p;
744   dof2 = dof*dof;
745 
746   if (b->ibdiagvalid) {
747     if (values) *values = b->ibdiag;
748     PetscFunctionReturn(0);
749   }
750   if (!b->ibdiag) {
751     PetscCall(PetscMalloc1(dof2*m,&b->ibdiag));
752     PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar)));
753   }
754   if (values) *values = b->ibdiag;
755   diag = b->ibdiag;
756 
757   PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots));
758   for (i=0; i<m; i++) {
759     if (S) {
760       PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar)));
761     } else {
762       PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar)));
763     }
764     if (b->isTI) {
765       aval = 0;
766       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
767       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
771       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
772     }
773     PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL));
774     diag += dof2;
775   }
776   PetscCall(PetscFree2(v_work,v_pivots));
777 
778   b->ibdiagvalid = PETSC_TRUE;
779   PetscFunctionReturn(0);
780 }
781 
782 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
783 {
784   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
785 
786   PetscFunctionBegin;
787   *B = kaij->AIJ;
788   PetscFunctionReturn(0);
789 }
790 
791 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
792 {
793   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
794   Mat            AIJ,OAIJ,B;
795   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
796   const PetscInt p = a->p,q = a->q;
797   PetscBool      ismpikaij,missing;
798 
799   PetscFunctionBegin;
800   if (reuse != MAT_REUSE_MATRIX) {
801     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
802     if (ismpikaij) {
803       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
804       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
805       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
806     } else {
807       AIJ = a->AIJ;
808       OAIJ = NULL;
809     }
810     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
811     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
812     PetscCall(MatSetType(B,MATAIJ));
813     PetscCall(MatGetSize(AIJ,&m,NULL));
814     PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */
815     if (!missing || !a->S) d = m;
816     PetscCall(PetscMalloc1(m*p,&d_nnz));
817     for (i = 0; i < m; ++i) {
818       PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
819       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
820       PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
821     }
822     if (OAIJ) {
823       PetscCall(PetscMalloc1(m*p,&o_nnz));
824       for (i = 0; i < m; ++i) {
825         PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
826         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
827         PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
828       }
829       PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
830     } else {
831       PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz));
832     }
833     PetscCall(PetscFree(d_nnz));
834     PetscCall(PetscFree(o_nnz));
835   } else B = *newmat;
836   PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B));
837   if (reuse == MAT_INPLACE_MATRIX) {
838     PetscCall(MatHeaderReplace(A,&B));
839   } else *newmat = B;
840   PetscFunctionReturn(0);
841 }
842 
843 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
844 {
845   PetscErrorCode    ierr;
846   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
848   const PetscScalar *aa = a->a, *T = kaij->T, *v;
849   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
850   const PetscScalar *b, *xb, *idiag;
851   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
852   PetscInt          i, j, k, i2, bs, bs2, nz;
853 
854   PetscFunctionBegin;
855   its = its*lits;
856   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
857   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
858   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
859   PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
860   PetscCheckFalse(p != q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
861   else        {bs = p; bs2 = bs*bs; }
862 
863   if (!m) PetscFunctionReturn(0);
864 
865   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL));
866   idiag = kaij->ibdiag;
867   diag  = a->diag;
868 
869   if (!kaij->sor.setup) {
870     PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr));
871     kaij->sor.setup = PETSC_TRUE;
872   }
873   y     = kaij->sor.y;
874   w     = kaij->sor.w;
875   work  = kaij->sor.work;
876   t     = kaij->sor.t;
877   arr   = kaij->sor.arr;
878 
879   ierr = VecGetArray(xx,&x);    PetscCall(ierr);
880   PetscCall(VecGetArrayRead(bb,&b));
881 
882   if (flag & SOR_ZERO_INITIAL_GUESS) {
883     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
884       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
885       PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar)));
886       i2     =  bs;
887       idiag  += bs2;
888       for (i=1; i<m; i++) {
889         v  = aa + ai[i];
890         vi = aj + ai[i];
891         nz = diag[i] - ai[i];
892 
893         if (T) {                /* b - T (Arow * x) */
894           PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar)));
895           for (j=0; j<nz; j++) {
896             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
897           }
898           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
899           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
900         } else if (kaij->isTI) {
901           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
902           for (j=0; j<nz; j++) {
903             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
904           }
905         } else {
906           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
907         }
908 
909         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
910         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
911 
912         idiag += bs2;
913         i2    += bs;
914       }
915       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
916       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
917       xb = t;
918     } else xb = b;
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
920       idiag = kaij->ibdiag+bs2*(m-1);
921       i2    = bs * (m-1);
922       PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
923       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
924       i2    -= bs;
925       idiag -= bs2;
926       for (i=m-2; i>=0; i--) {
927         v  = aa + diag[i] + 1 ;
928         vi = aj + diag[i] + 1;
929         nz = ai[i+1] - diag[i] - 1;
930 
931         if (T) {                /* FIXME: This branch untested */
932           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
937             workt += bs;
938           }
939           arrt = arr;
940           for (j=0; j<nz; j++) {
941             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
942             for (k=0; k<bs2; k++) arrt[k] *= v[j];
943             arrt += bs2;
944           }
945           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
946         } else if (kaij->isTI) {
947           PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar)));
948           for (j=0; j<nz; j++) {
949             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
950           }
951         }
952 
953         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
954         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
955 
956         idiag -= bs2;
957         i2    -= bs;
958       }
959       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
960     }
961     its--;
962   }
963   while (its--) {               /* FIXME: This branch not updated */
964     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
965       i2     =  0;
966       idiag  = kaij->ibdiag;
967       for (i=0; i<m; i++) {
968         PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
969 
970         v  = aa + ai[i];
971         vi = aj + ai[i];
972         nz = diag[i] - ai[i];
973         workt = work;
974         for (j=0; j<nz; j++) {
975           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
976           workt += bs;
977         }
978         arrt = arr;
979         if (T) {
980           for (j=0; j<nz; j++) {
981             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
982             for (k=0; k<bs2; k++) arrt[k] *= v[j];
983             arrt += bs2;
984           }
985           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
986         } else if (kaij->isTI) {
987           for (j=0; j<nz; j++) {
988             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
989             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
990             arrt += bs2;
991           }
992           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
993         }
994         PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar)));
995 
996         v  = aa + diag[i] + 1;
997         vi = aj + diag[i] + 1;
998         nz = ai[i+1] - diag[i] - 1;
999         workt = work;
1000         for (j=0; j<nz; j++) {
1001           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1002           workt += bs;
1003         }
1004         arrt = arr;
1005         if (T) {
1006           for (j=0; j<nz; j++) {
1007             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1008             for (k=0; k<bs2; k++) arrt[k] *= v[j];
1009             arrt += bs2;
1010           }
1011           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1012         } else if (kaij->isTI) {
1013           for (j=0; j<nz; j++) {
1014             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1015             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1016             arrt += bs2;
1017           }
1018           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1019         }
1020 
1021         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1022         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1023 
1024         idiag += bs2;
1025         i2    += bs;
1026       }
1027       xb = t;
1028     }
1029     else xb = b;
1030     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1031       idiag = kaij->ibdiag+bs2*(m-1);
1032       i2    = bs * (m-1);
1033       if (xb == b) {
1034         for (i=m-1; i>=0; i--) {
1035           PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
1036 
1037           v  = aa + ai[i];
1038           vi = aj + ai[i];
1039           nz = diag[i] - ai[i];
1040           workt = work;
1041           for (j=0; j<nz; j++) {
1042             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1043             workt += bs;
1044           }
1045           arrt = arr;
1046           if (T) {
1047             for (j=0; j<nz; j++) {
1048               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1049               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1050               arrt += bs2;
1051             }
1052             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053           } else if (kaij->isTI) {
1054             for (j=0; j<nz; j++) {
1055               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1056               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1057               arrt += bs2;
1058             }
1059             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1060           }
1061 
1062           v  = aa + diag[i] + 1;
1063           vi = aj + diag[i] + 1;
1064           nz = ai[i+1] - diag[i] - 1;
1065           workt = work;
1066           for (j=0; j<nz; j++) {
1067             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1068             workt += bs;
1069           }
1070           arrt = arr;
1071           if (T) {
1072             for (j=0; j<nz; j++) {
1073               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1074               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078           } else if (kaij->isTI) {
1079             for (j=0; j<nz; j++) {
1080               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1081               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1082               arrt += bs2;
1083             }
1084             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1085           }
1086 
1087           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1088           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1089         }
1090       } else {
1091         for (i=m-1; i>=0; i--) {
1092           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
1093           v  = aa + diag[i] + 1;
1094           vi = aj + diag[i] + 1;
1095           nz = ai[i+1] - diag[i] - 1;
1096           workt = work;
1097           for (j=0; j<nz; j++) {
1098             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1099             workt += bs;
1100           }
1101           arrt = arr;
1102           if (T) {
1103             for (j=0; j<nz; j++) {
1104               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1105               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1106               arrt += bs2;
1107             }
1108             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1109           } else if (kaij->isTI) {
1110             for (j=0; j<nz; j++) {
1111               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1112               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1113               arrt += bs2;
1114             }
1115             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1116           }
1117           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1118           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1119         }
1120       }
1121       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
1122     }
1123   }
1124 
1125   ierr = VecRestoreArray(xx,&x);    PetscCall(ierr);
1126   PetscCall(VecRestoreArrayRead(bb,&b));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 /*===================================================================================*/
1131 
1132 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (!yy) {
1138     PetscCall(VecSet(zz,0.0));
1139   } else {
1140     PetscCall(VecCopy(yy,zz));
1141   }
1142   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1143   /* start the scatter */
1144   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1145   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz));
1146   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1147   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1152 {
1153   PetscFunctionBegin;
1154   PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy));
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1159 {
1160   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1164   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values));
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 /* ----------------------------------------------------------------*/
1169 
1170 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1171 {
1172   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1173   PetscErrorCode  diag = PETSC_FALSE;
1174   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1175   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1176 
1177   PetscFunctionBegin;
1178   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1179   b->getrowactive = PETSC_TRUE;
1180   PetscCheckFalse(row < 0 || row >= A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1181 
1182   if ((!S) && (!T) && (!b->isTI)) {
1183     if (ncols)    *ncols  = 0;
1184     if (cols)     *cols   = NULL;
1185     if (values)   *values = NULL;
1186     PetscFunctionReturn(0);
1187   }
1188 
1189   if (T || b->isTI) {
1190     PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij));
1191     c     = nzaij;
1192     for (i=0; i<nzaij; i++) {
1193       /* check if this row contains a diagonal entry */
1194       if (colsaij[i] == r) {
1195         diag = PETSC_TRUE;
1196         c = i;
1197       }
1198     }
1199   } else nzaij = c = 0;
1200 
1201   /* calculate size of row */
1202   nz = 0;
1203   if (S)            nz += q;
1204   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1205 
1206   if (cols || values) {
1207     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1208     for (i=0; i<q; i++) {
1209       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1210       v[i] = 0.0;
1211     }
1212     if (b->isTI) {
1213       for (i=0; i<nzaij; i++) {
1214         for (j=0; j<q; j++) {
1215           idx[i*q+j] = colsaij[i]*q+j;
1216           v[i*q+j]   = (j==s ? vaij[i] : 0);
1217         }
1218       }
1219     } else if (T) {
1220       for (i=0; i<nzaij; i++) {
1221         for (j=0; j<q; j++) {
1222           idx[i*q+j] = colsaij[i]*q+j;
1223           v[i*q+j]   = vaij[i]*T[s+j*p];
1224         }
1225       }
1226     }
1227     if (S) {
1228       for (j=0; j<q; j++) {
1229         idx[c*q+j] = r*q+j;
1230         v[c*q+j]  += S[s+j*p];
1231       }
1232     }
1233   }
1234 
1235   if (ncols)    *ncols  = nz;
1236   if (cols)     *cols   = idx;
1237   if (values)   *values = v;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1242 {
1243   PetscFunctionBegin;
1244   if (nz) *nz = 0;
1245   PetscCall(PetscFree2(*idx,*v));
1246   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1251 {
1252   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1253   Mat             AIJ     = b->A;
1254   PetscBool       diag    = PETSC_FALSE;
1255   Mat             MatAIJ,MatOAIJ;
1256   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1257   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1258   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1262   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1263   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1264   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1265   b->getrowactive = PETSC_TRUE;
1266   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1267   lrow = row - rstart;
1268 
1269   if ((!S) && (!T) && (!b->isTI)) {
1270     if (ncols)    *ncols  = 0;
1271     if (cols)     *cols   = NULL;
1272     if (values)   *values = NULL;
1273     PetscFunctionReturn(0);
1274   }
1275 
1276   r = lrow/p;
1277   s = lrow%p;
1278 
1279   if (T || b->isTI) {
1280     PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray));
1281     PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals));
1282     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals));
1283 
1284     c     = ncolsaij + ncolsoaij;
1285     for (i=0; i<ncolsaij; i++) {
1286       /* check if this row contains a diagonal entry */
1287       if (colsaij[i] == r) {
1288         diag = PETSC_TRUE;
1289         c = i;
1290       }
1291     }
1292   } else c = 0;
1293 
1294   /* calculate size of row */
1295   nz = 0;
1296   if (S)            nz += q;
1297   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1298 
1299   if (cols || values) {
1300     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1301     for (i=0; i<q; i++) {
1302       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1303       v[i] = 0.0;
1304     }
1305     if (b->isTI) {
1306       for (i=0; i<ncolsaij; i++) {
1307         for (j=0; j<q; j++) {
1308           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1309           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1310         }
1311       }
1312       for (i=0; i<ncolsoaij; i++) {
1313         for (j=0; j<q; j++) {
1314           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1315           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1316         }
1317       }
1318     } else if (T) {
1319       for (i=0; i<ncolsaij; i++) {
1320         for (j=0; j<q; j++) {
1321           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1322           v[i*q+j]   = vals[i]*T[s+j*p];
1323         }
1324       }
1325       for (i=0; i<ncolsoaij; i++) {
1326         for (j=0; j<q; j++) {
1327           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1328           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1329         }
1330       }
1331     }
1332     if (S) {
1333       for (j=0; j<q; j++) {
1334         idx[c*q+j] = (r+rstart/p)*q+j;
1335         v[c*q+j]  += S[s+j*p];
1336       }
1337     }
1338   }
1339 
1340   if (ncols)  *ncols  = nz;
1341   if (cols)   *cols   = idx;
1342   if (values) *values = v;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   PetscFunctionBegin;
1349   PetscCall(PetscFree2(*idx,*v));
1350   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1355 {
1356   Mat            A;
1357 
1358   PetscFunctionBegin;
1359   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
1360   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
1361   PetscCall(MatDestroy(&A));
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 /* ---------------------------------------------------------------------------------- */
1366 /*@C
1367   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1368 
1369     [I \otimes S + A \otimes T]
1370 
1371   where
1372     S is a dense (p \times q) matrix
1373     T is a dense (p \times q) matrix
1374     A is an AIJ  (n \times n) matrix
1375     I is the identity matrix
1376   The resulting matrix is (np \times nq)
1377 
1378   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1379 
1380   Collective
1381 
1382   Input Parameters:
1383 + A - the AIJ matrix
1384 . p - number of rows in S and T
1385 . q - number of columns in S and T
1386 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1387 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1388 
1389   Output Parameter:
1390 . kaij - the new KAIJ matrix
1391 
1392   Notes:
1393   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1394   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1395 
1396   Developer Notes:
1397   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1398   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1399   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1400   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1401 
1402   Level: advanced
1403 
1404 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1405 @*/
1406 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1407 {
1408   PetscFunctionBegin;
1409   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij));
1410   PetscCall(MatSetType(*kaij,MATKAIJ));
1411   PetscCall(MatKAIJSetAIJ(*kaij,A));
1412   PetscCall(MatKAIJSetS(*kaij,p,q,S));
1413   PetscCall(MatKAIJSetT(*kaij,p,q,T));
1414   PetscCall(MatSetUp(*kaij));
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*MC
1419   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1420     [I \otimes S + A \otimes T],
1421   where
1422     S is a dense (p \times q) matrix,
1423     T is a dense (p \times q) matrix,
1424     A is an AIJ  (n \times n) matrix,
1425     and I is the identity matrix.
1426   The resulting matrix is (np \times nq).
1427 
1428   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1429 
1430   Notes:
1431   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1432   where x and b are column vectors containing the row-major representations of X and B.
1433 
1434   Level: advanced
1435 
1436 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1437 M*/
1438 
1439 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1440 {
1441   Mat_MPIKAIJ    *b;
1442   PetscMPIInt    size;
1443 
1444   PetscFunctionBegin;
1445   PetscCall(PetscNewLog(A,&b));
1446   A->data  = (void*)b;
1447 
1448   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
1449 
1450   b->w    = NULL;
1451   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
1452   if (size == 1) {
1453     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ));
1454     A->ops->destroy             = MatDestroy_SeqKAIJ;
1455     A->ops->mult                = MatMult_SeqKAIJ;
1456     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1457     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1458     A->ops->getrow              = MatGetRow_SeqKAIJ;
1459     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1460     A->ops->sor                 = MatSOR_SeqKAIJ;
1461     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ));
1462   } else {
1463     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ));
1464     A->ops->destroy             = MatDestroy_MPIKAIJ;
1465     A->ops->mult                = MatMult_MPIKAIJ;
1466     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1467     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1468     A->ops->getrow              = MatGetRow_MPIKAIJ;
1469     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1470     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ));
1471     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ));
1472   }
1473   A->ops->setup           = MatSetUp_KAIJ;
1474   A->ops->view            = MatView_KAIJ;
1475   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1476   PetscFunctionReturn(0);
1477 }
1478