xref: /petsc/src/mat/impls/kaij/kaij.c (revision 8fb5bd83c3955fefcf33a54e3bb66920a9fa884b)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: `MatCreateKAIJ()`
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscBool      ismpikaij,isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the KAIJ matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80 
81    Level: advanced
82 
83 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
84 @*/
85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
86 {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the KAIJ matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
109 
110    Level: advanced
111 
112 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
113 @*/
114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
115 {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the KAIJ matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with MatKAIJGetS()
134 
135   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
136   If NULL is passed, it will not attempt to zero the array pointer.
137 
138   Level: advanced
139 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
140 @*/
141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
142 {
143   PetscFunctionBegin;
144   if (S) *S = NULL;
145   PetscCall(PetscObjectStateIncrease((PetscObject)A));
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
151 
152   Not collective
153 
154   Input Parameter:
155 . A - the KAIJ matrix
156 
157   Output Parameter:
158 . S - location of pointer to array obtained with MatKAIJGetS()
159 
160   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
161   If NULL is passed, it will not attempt to zero the array pointer.
162 
163   Level: advanced
164 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
165 @*/
166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
167 {
168   PetscFunctionBegin;
169   if (S) *S = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
175 
176    Not Collective; the entire T is stored and returned independently on all processes
177 
178    Input Parameter:
179 .  A - the KAIJ matrix
180 
181    Output Parameters:
182 +  m - the number of rows in T
183 .  n - the number of columns in T
184 -  T - the T matrix, in form of a scalar array in column-major format
185 
186    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
187 
188    Level: advanced
189 
190 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
191 @*/
192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
193 {
194   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
195   PetscFunctionBegin;
196   if (m) *m = b->p;
197   if (n) *n = b->q;
198   if (T) *T = b->T;
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
204 
205    Not Collective; the entire T is stored and returned independently on all processes
206 
207    Input Parameter:
208 .  A - the KAIJ matrix
209 
210    Output Parameters:
211 +  m - the number of rows in T
212 .  n - the number of columns in T
213 -  T - the T matrix, in form of a scalar array in column-major format
214 
215    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
216 
217    Level: advanced
218 
219 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
220 @*/
221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
222 {
223   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
224   PetscFunctionBegin;
225   if (m) *m = b->p;
226   if (n) *n = b->q;
227   if (T) *T = b->T;
228   PetscFunctionReturn(0);
229 }
230 
231 /*@C
232   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
233 
234   Not collective
235 
236   Input Parameter:
237 . A - the KAIJ matrix
238 
239   Output Parameter:
240 . T - location of pointer to array obtained with MatKAIJGetS()
241 
242   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
243   If NULL is passed, it will not attempt to zero the array pointer.
244 
245   Level: advanced
246 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
247 @*/
248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
249 {
250   PetscFunctionBegin;
251   if (T) *T = NULL;
252   PetscCall(PetscObjectStateIncrease((PetscObject)A));
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
258 
259   Not collective
260 
261   Input Parameter:
262 . A - the KAIJ matrix
263 
264   Output Parameter:
265 . T - location of pointer to array obtained with MatKAIJGetS()
266 
267   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
268   If NULL is passed, it will not attempt to zero the array pointer.
269 
270   Level: advanced
271 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
272 @*/
273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
274 {
275   PetscFunctionBegin;
276   if (T) *T = NULL;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
282 
283    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
284 
285    Input Parameters:
286 +  A - the KAIJ matrix
287 -  B - the AIJ matrix
288 
289    Notes:
290    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
291    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
292 
293    Level: advanced
294 
295 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
296 @*/
297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
298 {
299   PetscMPIInt    size;
300   PetscBool      flg;
301 
302   PetscFunctionBegin;
303   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
304   if (size == 1) {
305     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg));
306     PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
307     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
308     a->AIJ = B;
309   } else {
310     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
311     a->A = B;
312   }
313   PetscCall(PetscObjectReference((PetscObject)B));
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
319 
320    Logically Collective; the entire S is stored independently on all processes.
321 
322    Input Parameters:
323 +  A - the KAIJ matrix
324 .  p - the number of rows in S
325 .  q - the number of columns in S
326 -  S - the S matrix, in form of a scalar array in column-major format
327 
328    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
329    The S matrix is copied, so the user can destroy this array.
330 
331    Level: Advanced
332 
333 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
334 @*/
335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
336 {
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   PetscCall(PetscFree(a->S));
341   if (S) {
342     PetscCall(PetscMalloc1(p*q,&a->S));
343     PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar)));
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
353 
354    Logically Collective.
355 
356    Input Parameter:
357 .  A - the KAIJ matrix
358 
359   Output Parameter:
360 .  identity - the Boolean value
361 
362    Level: Advanced
363 
364 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()`
365 @*/
366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
367 {
368   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
369   PetscInt    i,j;
370 
371   PetscFunctionBegin;
372   if (a->p != a->q) {
373     *identity = PETSC_FALSE;
374     PetscFunctionReturn(0);
375   } else *identity = PETSC_TRUE;
376   if (!a->isTI || a->S) {
377     for (i=0; i<a->p && *identity; i++) {
378       for (j=0; j<a->p && *identity; j++) {
379         if (i != j) {
380           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
381           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
382         } else {
383           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         }
386       }
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
394 
395    Logically Collective; the entire T is stored independently on all processes.
396 
397    Input Parameters:
398 +  A - the KAIJ matrix
399 .  p - the number of rows in S
400 .  q - the number of columns in S
401 -  T - the T matrix, in form of a scalar array in column-major format
402 
403    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
404    The T matrix is copied, so the user can destroy this array.
405 
406    Level: Advanced
407 
408 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
409 @*/
410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
411 {
412   PetscInt       i,j;
413   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
414   PetscBool      isTI = PETSC_FALSE;
415 
416   PetscFunctionBegin;
417   /* check if T is an identity matrix */
418   if (T && (p == q)) {
419     isTI = PETSC_TRUE;
420     for (i=0; i<p; i++) {
421       for (j=0; j<q; j++) {
422         if (i == j) {
423           /* diagonal term must be 1 */
424           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
425         } else {
426           /* off-diagonal term must be 0 */
427           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
428         }
429       }
430     }
431   }
432   a->isTI = isTI;
433 
434   PetscCall(PetscFree(a->T));
435   if (T && (!isTI)) {
436     PetscCall(PetscMalloc1(p*q,&a->T));
437     PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar)));
438   } else a->T = NULL;
439 
440   a->p = p;
441   a->q = q;
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
446 {
447   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
448 
449   PetscFunctionBegin;
450   PetscCall(MatDestroy(&b->AIJ));
451   PetscCall(PetscFree(b->S));
452   PetscCall(PetscFree(b->T));
453   PetscCall(PetscFree(b->ibdiag));
454   PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr));
455   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL));
456   PetscCall(PetscFree(A->data));
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
461 {
462   Mat_MPIKAIJ      *a;
463   Mat_MPIAIJ       *mpiaij;
464   PetscScalar      *T;
465   PetscInt         i,j;
466   PetscObjectState state;
467 
468   PetscFunctionBegin;
469   a = (Mat_MPIKAIJ*)A->data;
470   mpiaij = (Mat_MPIAIJ*)a->A->data;
471 
472   PetscCall(PetscObjectStateGet((PetscObject)a->A,&state));
473   if (state == a->state) {
474     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
475     PetscFunctionReturn(0);
476   } else {
477     PetscCall(MatDestroy(&a->AIJ));
478     PetscCall(MatDestroy(&a->OAIJ));
479     if (a->isTI) {
480       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
481        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
482        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
483        * to pass in. */
484       PetscCall(PetscMalloc1(a->p*a->q,&T));
485       for (i=0; i<a->p; i++) {
486         for (j=0; j<a->q; j++) {
487           if (i==j) T[i+j*a->p] = 1.0;
488           else      T[i+j*a->p] = 0.0;
489         }
490       }
491     } else T = a->T;
492     PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ));
493     PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ));
494     if (a->isTI) PetscCall(PetscFree(T));
495     a->state = state;
496   }
497 
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatSetUp_KAIJ(Mat A)
502 {
503   PetscInt       n;
504   PetscMPIInt    size;
505   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
506 
507   PetscFunctionBegin;
508   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
509   if (size == 1) {
510     PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N));
511     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
512     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
513     PetscCall(PetscLayoutSetUp(A->rmap));
514     PetscCall(PetscLayoutSetUp(A->cmap));
515   } else {
516     Mat_MPIKAIJ *a;
517     Mat_MPIAIJ  *mpiaij;
518     IS          from,to;
519     Vec         gvec;
520 
521     a = (Mat_MPIKAIJ*)A->data;
522     mpiaij = (Mat_MPIAIJ*)a->A->data;
523     PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N));
524     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
525     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
526     PetscCall(PetscLayoutSetUp(A->rmap));
527     PetscCall(PetscLayoutSetUp(A->cmap));
528 
529     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
530 
531     PetscCall(VecGetSize(mpiaij->lvec,&n));
532     PetscCall(VecCreate(PETSC_COMM_SELF,&a->w));
533     PetscCall(VecSetSizes(a->w,n*a->q,n*a->q));
534     PetscCall(VecSetBlockSize(a->w,a->q));
535     PetscCall(VecSetType(a->w,VECSEQ));
536 
537     /* create two temporary Index sets for build scatter gather */
538     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
539     PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to));
540 
541     /* create temporary global vector to generate scatter context */
542     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec));
543 
544     /* generate the scatter context */
545     PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx));
546 
547     PetscCall(ISDestroy(&from));
548     PetscCall(ISDestroy(&to));
549     PetscCall(VecDestroy(&gvec));
550   }
551 
552   A->assembled = PETSC_TRUE;
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
557 {
558   PetscViewerFormat format;
559   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
560   Mat               B;
561   PetscInt          i;
562   PetscBool         ismpikaij;
563 
564   PetscFunctionBegin;
565   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
566   PetscCall(PetscViewerGetFormat(viewer,&format));
567   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568     PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q));
569 
570     /* Print appropriate details for S. */
571     if (!a->S) {
572       PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n"));
573     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are "));
575       for (i=0; i<(a->p * a->q); i++) {
576 #if defined(PETSC_USE_COMPLEX)
577         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i])));
578 #else
579         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i])));
580 #endif
581       }
582       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
583     }
584 
585     /* Print appropriate details for T. */
586     if (a->isTI) {
587       PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"));
588     } else if (!a->T) {
589       PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n"));
590     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are "));
592       for (i=0; i<(a->p * a->q); i++) {
593 #if defined(PETSC_USE_COMPLEX)
594         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i])));
595 #else
596         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i])));
597 #endif
598       }
599       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
600     }
601 
602     /* Now print details for the AIJ matrix, using the AIJ viewer. */
603     PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n"));
604     if (ismpikaij) {
605       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
606       PetscCall(MatView(b->A,viewer));
607     } else {
608       PetscCall(MatView(a->AIJ,viewer));
609     }
610 
611   } else {
612     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613     PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B));
614     PetscCall(MatView(B,viewer));
615     PetscCall(MatDestroy(&B));
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
621 {
622   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
623 
624   PetscFunctionBegin;
625   PetscCall(MatDestroy(&b->AIJ));
626   PetscCall(MatDestroy(&b->OAIJ));
627   PetscCall(MatDestroy(&b->A));
628   PetscCall(VecScatterDestroy(&b->ctx));
629   PetscCall(VecDestroy(&b->w));
630   PetscCall(PetscFree(b->S));
631   PetscCall(PetscFree(b->T));
632   PetscCall(PetscFree(b->ibdiag));
633   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL));
635   PetscCall(PetscFree(A->data));
636   PetscFunctionReturn(0);
637 }
638 
639 /* --------------------------------------------------------------------------------------*/
640 
641 /* zz = yy + Axx */
642 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
645   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
646   const PetscScalar *s = b->S, *t = b->T;
647   const PetscScalar *x,*v,*bx;
648   PetscScalar       *y,*sums;
649   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
650   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
651 
652   PetscFunctionBegin;
653   if (!yy) {
654     PetscCall(VecSet(zz,0.0));
655   } else {
656     PetscCall(VecCopy(yy,zz));
657   }
658   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
659 
660   PetscCall(VecGetArrayRead(xx,&x));
661   PetscCall(VecGetArray(zz,&y));
662   idx  = a->j;
663   v    = a->a;
664   ii   = a->i;
665 
666   if (b->isTI) {
667     for (i=0; i<m; i++) {
668       jrow = ii[i];
669       n    = ii[i+1] - jrow;
670       sums = y + p*i;
671       for (j=0; j<n; j++) {
672         for (k=0; k<p; k++) {
673           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
674         }
675       }
676     }
677     PetscCall(PetscLogFlops(3.0*(a->nz)*p));
678   } else if (t) {
679     for (i=0; i<m; i++) {
680       jrow = ii[i];
681       n    = ii[i+1] - jrow;
682       sums = y + p*i;
683       for (j=0; j<n; j++) {
684         for (k=0; k<p; k++) {
685           for (l=0; l<q; l++) {
686             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
687           }
688         }
689       }
690     }
691     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
692      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
693      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
694      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
695     PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz));
696   }
697   if (s) {
698     for (i=0; i<m; i++) {
699       sums = y + p*i;
700       bx   = x + q*i;
701       if (i < b->AIJ->cmap->n) {
702         for (j=0; j<q; j++) {
703           for (k=0; k<p; k++) {
704             sums[k] += s[k+j*p]*bx[j];
705           }
706         }
707       }
708     }
709     PetscCall(PetscLogFlops(2.0*m*p*q));
710   }
711 
712   PetscCall(VecRestoreArrayRead(xx,&x));
713   PetscCall(VecRestoreArray(zz,&y));
714   PetscFunctionReturn(0);
715 }
716 
717 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
718 {
719   PetscFunctionBegin;
720   PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy));
721   PetscFunctionReturn(0);
722 }
723 
724 #include <petsc/private/kernels/blockinvert.h>
725 
726 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
727 {
728   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
729   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
730   const PetscScalar *S  = b->S;
731   const PetscScalar *T  = b->T;
732   const PetscScalar *v  = a->a;
733   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
734   PetscInt          i,j,*v_pivots,dof,dof2;
735   PetscScalar       *diag,aval,*v_work;
736 
737   PetscFunctionBegin;
738   PetscCheck(p == q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
739   PetscCheck(S || T || b->isTI,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
740 
741   dof  = p;
742   dof2 = dof*dof;
743 
744   if (b->ibdiagvalid) {
745     if (values) *values = b->ibdiag;
746     PetscFunctionReturn(0);
747   }
748   if (!b->ibdiag) {
749     PetscCall(PetscMalloc1(dof2*m,&b->ibdiag));
750     PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar)));
751   }
752   if (values) *values = b->ibdiag;
753   diag = b->ibdiag;
754 
755   PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots));
756   for (i=0; i<m; i++) {
757     if (S) {
758       PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar)));
759     } else {
760       PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar)));
761     }
762     if (b->isTI) {
763       aval = 0;
764       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
765       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
766     } else if (T) {
767       aval = 0;
768       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
769       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
770     }
771     PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL));
772     diag += dof2;
773   }
774   PetscCall(PetscFree2(v_work,v_pivots));
775 
776   b->ibdiagvalid = PETSC_TRUE;
777   PetscFunctionReturn(0);
778 }
779 
780 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
781 {
782   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
783 
784   PetscFunctionBegin;
785   *B = kaij->AIJ;
786   PetscFunctionReturn(0);
787 }
788 
789 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
790 {
791   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
792   Mat            AIJ,OAIJ,B;
793   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
794   const PetscInt p = a->p,q = a->q;
795   PetscBool      ismpikaij,missing;
796 
797   PetscFunctionBegin;
798   if (reuse != MAT_REUSE_MATRIX) {
799     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
800     if (ismpikaij) {
801       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
802       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
803       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
804     } else {
805       AIJ = a->AIJ;
806       OAIJ = NULL;
807     }
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
809     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
810     PetscCall(MatSetType(B,MATAIJ));
811     PetscCall(MatGetSize(AIJ,&m,NULL));
812     PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */
813     if (!missing || !a->S) d = m;
814     PetscCall(PetscMalloc1(m*p,&d_nnz));
815     for (i = 0; i < m; ++i) {
816       PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
817       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
818       PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
819     }
820     if (OAIJ) {
821       PetscCall(PetscMalloc1(m*p,&o_nnz));
822       for (i = 0; i < m; ++i) {
823         PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
824         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
825         PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
826       }
827       PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
828     } else {
829       PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz));
830     }
831     PetscCall(PetscFree(d_nnz));
832     PetscCall(PetscFree(o_nnz));
833   } else B = *newmat;
834   PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B));
835   if (reuse == MAT_INPLACE_MATRIX) {
836     PetscCall(MatHeaderReplace(A,&B));
837   } else *newmat = B;
838   PetscFunctionReturn(0);
839 }
840 
841 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
842 {
843   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
844   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
845   const PetscScalar *aa = a->a, *T = kaij->T, *v;
846   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
847   const PetscScalar *b, *xb, *idiag;
848   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
849   PetscInt          i, j, k, i2, bs, bs2, nz;
850 
851   PetscFunctionBegin;
852   its = its*lits;
853   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
854   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
855   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
856   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
857   PetscCheck(p == q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
858   else        {bs = p; bs2 = bs*bs; }
859 
860   if (!m) PetscFunctionReturn(0);
861 
862   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL));
863   idiag = kaij->ibdiag;
864   diag  = a->diag;
865 
866   if (!kaij->sor.setup) {
867     PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr));
868     kaij->sor.setup = PETSC_TRUE;
869   }
870   y     = kaij->sor.y;
871   w     = kaij->sor.w;
872   work  = kaij->sor.work;
873   t     = kaij->sor.t;
874   arr   = kaij->sor.arr;
875 
876   PetscCall( VecGetArray(xx,&x));
877   PetscCall(VecGetArrayRead(bb,&b));
878 
879   if (flag & SOR_ZERO_INITIAL_GUESS) {
880     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
881       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
882       PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar)));
883       i2     =  bs;
884       idiag  += bs2;
885       for (i=1; i<m; i++) {
886         v  = aa + ai[i];
887         vi = aj + ai[i];
888         nz = diag[i] - ai[i];
889 
890         if (T) {                /* b - T (Arow * x) */
891           PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar)));
892           for (j=0; j<nz; j++) {
893             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
894           }
895           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
896           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
897         } else if (kaij->isTI) {
898           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
899           for (j=0; j<nz; j++) {
900             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
901           }
902         } else {
903           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
904         }
905 
906         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
907         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
908 
909         idiag += bs2;
910         i2    += bs;
911       }
912       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
913       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
914       xb = t;
915     } else xb = b;
916     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
917       idiag = kaij->ibdiag+bs2*(m-1);
918       i2    = bs * (m-1);
919       PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
920       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
921       i2    -= bs;
922       idiag -= bs2;
923       for (i=m-2; i>=0; i--) {
924         v  = aa + diag[i] + 1 ;
925         vi = aj + diag[i] + 1;
926         nz = ai[i+1] - diag[i] - 1;
927 
928         if (T) {                /* FIXME: This branch untested */
929           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
930           /* copy all rows of x that are needed into contiguous space */
931           workt = work;
932           for (j=0; j<nz; j++) {
933             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
934             workt += bs;
935           }
936           arrt = arr;
937           for (j=0; j<nz; j++) {
938             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
939             for (k=0; k<bs2; k++) arrt[k] *= v[j];
940             arrt += bs2;
941           }
942           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
943         } else if (kaij->isTI) {
944           PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar)));
945           for (j=0; j<nz; j++) {
946             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
947           }
948         }
949 
950         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
951         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
952 
953         idiag -= bs2;
954         i2    -= bs;
955       }
956       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
957     }
958     its--;
959   }
960   while (its--) {               /* FIXME: This branch not updated */
961     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
962       i2     =  0;
963       idiag  = kaij->ibdiag;
964       for (i=0; i<m; i++) {
965         PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
966 
967         v  = aa + ai[i];
968         vi = aj + ai[i];
969         nz = diag[i] - ai[i];
970         workt = work;
971         for (j=0; j<nz; j++) {
972           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
973           workt += bs;
974         }
975         arrt = arr;
976         if (T) {
977           for (j=0; j<nz; j++) {
978             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
979             for (k=0; k<bs2; k++) arrt[k] *= v[j];
980             arrt += bs2;
981           }
982           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
983         } else if (kaij->isTI) {
984           for (j=0; j<nz; j++) {
985             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
986             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
987             arrt += bs2;
988           }
989           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
990         }
991         PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar)));
992 
993         v  = aa + diag[i] + 1;
994         vi = aj + diag[i] + 1;
995         nz = ai[i+1] - diag[i] - 1;
996         workt = work;
997         for (j=0; j<nz; j++) {
998           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
999           workt += bs;
1000         }
1001         arrt = arr;
1002         if (T) {
1003           for (j=0; j<nz; j++) {
1004             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1005             for (k=0; k<bs2; k++) arrt[k] *= v[j];
1006             arrt += bs2;
1007           }
1008           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1009         } else if (kaij->isTI) {
1010           for (j=0; j<nz; j++) {
1011             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1012             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1013             arrt += bs2;
1014           }
1015           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1016         }
1017 
1018         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1019         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1020 
1021         idiag += bs2;
1022         i2    += bs;
1023       }
1024       xb = t;
1025     }
1026     else xb = b;
1027     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1028       idiag = kaij->ibdiag+bs2*(m-1);
1029       i2    = bs * (m-1);
1030       if (xb == b) {
1031         for (i=m-1; i>=0; i--) {
1032           PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
1033 
1034           v  = aa + ai[i];
1035           vi = aj + ai[i];
1036           nz = diag[i] - ai[i];
1037           workt = work;
1038           for (j=0; j<nz; j++) {
1039             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1040             workt += bs;
1041           }
1042           arrt = arr;
1043           if (T) {
1044             for (j=0; j<nz; j++) {
1045               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1046               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1047               arrt += bs2;
1048             }
1049             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1050           } else if (kaij->isTI) {
1051             for (j=0; j<nz; j++) {
1052               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1053               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1054               arrt += bs2;
1055             }
1056             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1057           }
1058 
1059           v  = aa + diag[i] + 1;
1060           vi = aj + diag[i] + 1;
1061           nz = ai[i+1] - diag[i] - 1;
1062           workt = work;
1063           for (j=0; j<nz; j++) {
1064             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1065             workt += bs;
1066           }
1067           arrt = arr;
1068           if (T) {
1069             for (j=0; j<nz; j++) {
1070               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1071               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1072               arrt += bs2;
1073             }
1074             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1075           } else if (kaij->isTI) {
1076             for (j=0; j<nz; j++) {
1077               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1078               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1079               arrt += bs2;
1080             }
1081             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1082           }
1083 
1084           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1085           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1086         }
1087       } else {
1088         for (i=m-1; i>=0; i--) {
1089           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
1090           v  = aa + diag[i] + 1;
1091           vi = aj + diag[i] + 1;
1092           nz = ai[i+1] - diag[i] - 1;
1093           workt = work;
1094           for (j=0; j<nz; j++) {
1095             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1096             workt += bs;
1097           }
1098           arrt = arr;
1099           if (T) {
1100             for (j=0; j<nz; j++) {
1101               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1102               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1103               arrt += bs2;
1104             }
1105             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1106           } else if (kaij->isTI) {
1107             for (j=0; j<nz; j++) {
1108               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1109               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1110               arrt += bs2;
1111             }
1112             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1113           }
1114           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1115           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1116         }
1117       }
1118       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
1119     }
1120   }
1121 
1122   PetscCall(VecRestoreArray(xx,&x));
1123   PetscCall(VecRestoreArrayRead(bb,&b));
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 /*===================================================================================*/
1128 
1129 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1130 {
1131   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1132 
1133   PetscFunctionBegin;
1134   if (!yy) {
1135     PetscCall(VecSet(zz,0.0));
1136   } else {
1137     PetscCall(VecCopy(yy,zz));
1138   }
1139   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1140   /* start the scatter */
1141   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1142   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz));
1143   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1144   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1149 {
1150   PetscFunctionBegin;
1151   PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy));
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1156 {
1157   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1161   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 /* ----------------------------------------------------------------*/
1166 
1167 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1168 {
1169   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1170   PetscErrorCode  diag = PETSC_FALSE;
1171   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1172   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1173 
1174   PetscFunctionBegin;
1175   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1176   b->getrowactive = PETSC_TRUE;
1177   PetscCheck(row >= 0 && row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1178 
1179   if ((!S) && (!T) && (!b->isTI)) {
1180     if (ncols)    *ncols  = 0;
1181     if (cols)     *cols   = NULL;
1182     if (values)   *values = NULL;
1183     PetscFunctionReturn(0);
1184   }
1185 
1186   if (T || b->isTI) {
1187     PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij));
1188     c     = nzaij;
1189     for (i=0; i<nzaij; i++) {
1190       /* check if this row contains a diagonal entry */
1191       if (colsaij[i] == r) {
1192         diag = PETSC_TRUE;
1193         c = i;
1194       }
1195     }
1196   } else nzaij = c = 0;
1197 
1198   /* calculate size of row */
1199   nz = 0;
1200   if (S)            nz += q;
1201   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1202 
1203   if (cols || values) {
1204     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1205     for (i=0; i<q; i++) {
1206       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1207       v[i] = 0.0;
1208     }
1209     if (b->isTI) {
1210       for (i=0; i<nzaij; i++) {
1211         for (j=0; j<q; j++) {
1212           idx[i*q+j] = colsaij[i]*q+j;
1213           v[i*q+j]   = (j==s ? vaij[i] : 0);
1214         }
1215       }
1216     } else if (T) {
1217       for (i=0; i<nzaij; i++) {
1218         for (j=0; j<q; j++) {
1219           idx[i*q+j] = colsaij[i]*q+j;
1220           v[i*q+j]   = vaij[i]*T[s+j*p];
1221         }
1222       }
1223     }
1224     if (S) {
1225       for (j=0; j<q; j++) {
1226         idx[c*q+j] = r*q+j;
1227         v[c*q+j]  += S[s+j*p];
1228       }
1229     }
1230   }
1231 
1232   if (ncols)    *ncols  = nz;
1233   if (cols)     *cols   = idx;
1234   if (values)   *values = v;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1239 {
1240   PetscFunctionBegin;
1241   if (nz) *nz = 0;
1242   PetscCall(PetscFree2(*idx,*v));
1243   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1248 {
1249   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1250   Mat             AIJ     = b->A;
1251   PetscBool       diag    = PETSC_FALSE;
1252   Mat             MatAIJ,MatOAIJ;
1253   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1254   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1255   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1256 
1257   PetscFunctionBegin;
1258   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1259   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1260   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1261   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1262   b->getrowactive = PETSC_TRUE;
1263   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1264   lrow = row - rstart;
1265 
1266   if ((!S) && (!T) && (!b->isTI)) {
1267     if (ncols)    *ncols  = 0;
1268     if (cols)     *cols   = NULL;
1269     if (values)   *values = NULL;
1270     PetscFunctionReturn(0);
1271   }
1272 
1273   r = lrow/p;
1274   s = lrow%p;
1275 
1276   if (T || b->isTI) {
1277     PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray));
1278     PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals));
1279     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals));
1280 
1281     c     = ncolsaij + ncolsoaij;
1282     for (i=0; i<ncolsaij; i++) {
1283       /* check if this row contains a diagonal entry */
1284       if (colsaij[i] == r) {
1285         diag = PETSC_TRUE;
1286         c = i;
1287       }
1288     }
1289   } else c = 0;
1290 
1291   /* calculate size of row */
1292   nz = 0;
1293   if (S)            nz += q;
1294   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1295 
1296   if (cols || values) {
1297     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1298     for (i=0; i<q; i++) {
1299       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1300       v[i] = 0.0;
1301     }
1302     if (b->isTI) {
1303       for (i=0; i<ncolsaij; i++) {
1304         for (j=0; j<q; j++) {
1305           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1306           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1307         }
1308       }
1309       for (i=0; i<ncolsoaij; i++) {
1310         for (j=0; j<q; j++) {
1311           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1312           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1313         }
1314       }
1315     } else if (T) {
1316       for (i=0; i<ncolsaij; i++) {
1317         for (j=0; j<q; j++) {
1318           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1319           v[i*q+j]   = vals[i]*T[s+j*p];
1320         }
1321       }
1322       for (i=0; i<ncolsoaij; i++) {
1323         for (j=0; j<q; j++) {
1324           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1325           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1326         }
1327       }
1328     }
1329     if (S) {
1330       for (j=0; j<q; j++) {
1331         idx[c*q+j] = (r+rstart/p)*q+j;
1332         v[c*q+j]  += S[s+j*p];
1333       }
1334     }
1335   }
1336 
1337   if (ncols)  *ncols  = nz;
1338   if (cols)   *cols   = idx;
1339   if (values) *values = v;
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344 {
1345   PetscFunctionBegin;
1346   PetscCall(PetscFree2(*idx,*v));
1347   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1352 {
1353   Mat            A;
1354 
1355   PetscFunctionBegin;
1356   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
1357   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
1358   PetscCall(MatDestroy(&A));
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 /* ---------------------------------------------------------------------------------- */
1363 /*@C
1364   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1365 
1366     [I \otimes S + A \otimes T]
1367 
1368   where
1369     S is a dense (p \times q) matrix
1370     T is a dense (p \times q) matrix
1371     A is an AIJ  (n \times n) matrix
1372     I is the identity matrix
1373   The resulting matrix is (np \times nq)
1374 
1375   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1376 
1377   Collective
1378 
1379   Input Parameters:
1380 + A - the AIJ matrix
1381 . p - number of rows in S and T
1382 . q - number of columns in S and T
1383 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1384 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1385 
1386   Output Parameter:
1387 . kaij - the new KAIJ matrix
1388 
1389   Notes:
1390   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1391   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1392 
1393   Developer Notes:
1394   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1395   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1396   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1397   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1398 
1399   Level: advanced
1400 
1401 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1402 @*/
1403 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1404 {
1405   PetscFunctionBegin;
1406   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij));
1407   PetscCall(MatSetType(*kaij,MATKAIJ));
1408   PetscCall(MatKAIJSetAIJ(*kaij,A));
1409   PetscCall(MatKAIJSetS(*kaij,p,q,S));
1410   PetscCall(MatKAIJSetT(*kaij,p,q,T));
1411   PetscCall(MatSetUp(*kaij));
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 /*MC
1416   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1417     [I \otimes S + A \otimes T],
1418   where
1419     S is a dense (p \times q) matrix,
1420     T is a dense (p \times q) matrix,
1421     A is an AIJ  (n \times n) matrix,
1422     and I is the identity matrix.
1423   The resulting matrix is (np \times nq).
1424 
1425   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1426 
1427   Notes:
1428   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1429   where x and b are column vectors containing the row-major representations of X and B.
1430 
1431   Level: advanced
1432 
1433 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1434 M*/
1435 
1436 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1437 {
1438   Mat_MPIKAIJ    *b;
1439   PetscMPIInt    size;
1440 
1441   PetscFunctionBegin;
1442   PetscCall(PetscNewLog(A,&b));
1443   A->data  = (void*)b;
1444 
1445   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
1446 
1447   b->w    = NULL;
1448   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
1449   if (size == 1) {
1450     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ));
1451     A->ops->destroy             = MatDestroy_SeqKAIJ;
1452     A->ops->mult                = MatMult_SeqKAIJ;
1453     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1454     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1455     A->ops->getrow              = MatGetRow_SeqKAIJ;
1456     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1457     A->ops->sor                 = MatSOR_SeqKAIJ;
1458     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ));
1459   } else {
1460     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ));
1461     A->ops->destroy             = MatDestroy_MPIKAIJ;
1462     A->ops->mult                = MatMult_MPIKAIJ;
1463     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1464     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1465     A->ops->getrow              = MatGetRow_MPIKAIJ;
1466     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1467     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ));
1468     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ));
1469   }
1470   A->ops->setup           = MatSetUp_KAIJ;
1471   A->ops->view            = MatView_KAIJ;
1472   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1473   PetscFunctionReturn(0);
1474 }
1475