xref: /petsc/src/mat/impls/kaij/kaij.c (revision 4ffacfe27a72f4cdf51b68a3bbb6aed96040fb2f)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: `MatCreateKAIJ()`
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscBool      ismpikaij,isseqkaij;
50 
51   PetscFunctionBegin;
52   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
53   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the KAIJ matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80 
81    Level: advanced
82 
83 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
84 @*/
85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
86 {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the KAIJ matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
109 
110    Level: advanced
111 
112 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
113 @*/
114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
115 {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the KAIJ matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with MatKAIJGetS()
134 
135   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
136   If NULL is passed, it will not attempt to zero the array pointer.
137 
138   Level: advanced
139 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
140 @*/
141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
142 {
143   PetscFunctionBegin;
144   if (S) *S = NULL;
145   PetscCall(PetscObjectStateIncrease((PetscObject)A));
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
151 
152   Not collective
153 
154   Input Parameter:
155 . A - the KAIJ matrix
156 
157   Output Parameter:
158 . S - location of pointer to array obtained with MatKAIJGetS()
159 
160   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
161   If NULL is passed, it will not attempt to zero the array pointer.
162 
163   Level: advanced
164 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
165 @*/
166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
167 {
168   PetscFunctionBegin;
169   if (S) *S = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
175 
176    Not Collective; the entire T is stored and returned independently on all processes
177 
178    Input Parameter:
179 .  A - the KAIJ matrix
180 
181    Output Parameters:
182 +  m - the number of rows in T
183 .  n - the number of columns in T
184 -  T - the T matrix, in form of a scalar array in column-major format
185 
186    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
187 
188    Level: advanced
189 
190 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
191 @*/
192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
193 {
194   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
195   PetscFunctionBegin;
196   if (m) *m = b->p;
197   if (n) *n = b->q;
198   if (T) *T = b->T;
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
204 
205    Not Collective; the entire T is stored and returned independently on all processes
206 
207    Input Parameter:
208 .  A - the KAIJ matrix
209 
210    Output Parameters:
211 +  m - the number of rows in T
212 .  n - the number of columns in T
213 -  T - the T matrix, in form of a scalar array in column-major format
214 
215    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
216 
217    Level: advanced
218 
219 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()`
220 @*/
221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
222 {
223   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
224   PetscFunctionBegin;
225   if (m) *m = b->p;
226   if (n) *n = b->q;
227   if (T) *T = b->T;
228   PetscFunctionReturn(0);
229 }
230 
231 /*@C
232   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
233 
234   Not collective
235 
236   Input Parameter:
237 . A - the KAIJ matrix
238 
239   Output Parameter:
240 . T - location of pointer to array obtained with MatKAIJGetS()
241 
242   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
243   If NULL is passed, it will not attempt to zero the array pointer.
244 
245   Level: advanced
246 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
247 @*/
248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
249 {
250   PetscFunctionBegin;
251   if (T) *T = NULL;
252   PetscCall(PetscObjectStateIncrease((PetscObject)A));
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
258 
259   Not collective
260 
261   Input Parameter:
262 . A - the KAIJ matrix
263 
264   Output Parameter:
265 . T - location of pointer to array obtained with MatKAIJGetS()
266 
267   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
268   If NULL is passed, it will not attempt to zero the array pointer.
269 
270   Level: advanced
271 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
272 @*/
273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
274 {
275   PetscFunctionBegin;
276   if (T) *T = NULL;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
282 
283    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
284 
285    Input Parameters:
286 +  A - the KAIJ matrix
287 -  B - the AIJ matrix
288 
289    Notes:
290    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
291    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
292 
293    Level: advanced
294 
295 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
296 @*/
297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
298 {
299   PetscMPIInt    size;
300   PetscBool      flg;
301 
302   PetscFunctionBegin;
303   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
304   if (size == 1) {
305     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg));
306     PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
307     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
308     a->AIJ = B;
309   } else {
310     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
311     a->A = B;
312   }
313   PetscCall(PetscObjectReference((PetscObject)B));
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
319 
320    Logically Collective; the entire S is stored independently on all processes.
321 
322    Input Parameters:
323 +  A - the KAIJ matrix
324 .  p - the number of rows in S
325 .  q - the number of columns in S
326 -  S - the S matrix, in form of a scalar array in column-major format
327 
328    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
329    The S matrix is copied, so the user can destroy this array.
330 
331    Level: Advanced
332 
333 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
334 @*/
335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
336 {
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   PetscCall(PetscFree(a->S));
341   if (S) {
342     PetscCall(PetscMalloc1(p*q,&a->S));
343     PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar)));
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
353 
354    Logically Collective.
355 
356    Input Parameter:
357 .  A - the KAIJ matrix
358 
359   Output Parameter:
360 .  identity - the Boolean value
361 
362    Level: Advanced
363 
364 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()`
365 @*/
366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
367 {
368   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
369   PetscInt    i,j;
370 
371   PetscFunctionBegin;
372   if (a->p != a->q) {
373     *identity = PETSC_FALSE;
374     PetscFunctionReturn(0);
375   } else *identity = PETSC_TRUE;
376   if (!a->isTI || a->S) {
377     for (i=0; i<a->p && *identity; i++) {
378       for (j=0; j<a->p && *identity; j++) {
379         if (i != j) {
380           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
381           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
382         } else {
383           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         }
386       }
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
394 
395    Logically Collective; the entire T is stored independently on all processes.
396 
397    Input Parameters:
398 +  A - the KAIJ matrix
399 .  p - the number of rows in S
400 .  q - the number of columns in S
401 -  T - the T matrix, in form of a scalar array in column-major format
402 
403    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
404    The T matrix is copied, so the user can destroy this array.
405 
406    Level: Advanced
407 
408 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
409 @*/
410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
411 {
412   PetscInt       i,j;
413   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
414   PetscBool      isTI = PETSC_FALSE;
415 
416   PetscFunctionBegin;
417   /* check if T is an identity matrix */
418   if (T && (p == q)) {
419     isTI = PETSC_TRUE;
420     for (i=0; i<p; i++) {
421       for (j=0; j<q; j++) {
422         if (i == j) {
423           /* diagonal term must be 1 */
424           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
425         } else {
426           /* off-diagonal term must be 0 */
427           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
428         }
429       }
430     }
431   }
432   a->isTI = isTI;
433 
434   PetscCall(PetscFree(a->T));
435   if (T && (!isTI)) {
436     PetscCall(PetscMalloc1(p*q,&a->T));
437     PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar)));
438   } else a->T = NULL;
439 
440   a->p = p;
441   a->q = q;
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
446 {
447   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
448 
449   PetscFunctionBegin;
450   PetscCall(MatDestroy(&b->AIJ));
451   PetscCall(PetscFree(b->S));
452   PetscCall(PetscFree(b->T));
453   PetscCall(PetscFree(b->ibdiag));
454   PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr));
455   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL));
456   PetscCall(PetscFree(A->data));
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
461 {
462   Mat_MPIKAIJ      *a;
463   Mat_MPIAIJ       *mpiaij;
464   PetscScalar      *T;
465   PetscInt         i,j;
466   PetscObjectState state;
467 
468   PetscFunctionBegin;
469   a = (Mat_MPIKAIJ*)A->data;
470   mpiaij = (Mat_MPIAIJ*)a->A->data;
471 
472   PetscCall(PetscObjectStateGet((PetscObject)a->A,&state));
473   if (state == a->state) {
474     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
475     PetscFunctionReturn(0);
476   } else {
477     PetscCall(MatDestroy(&a->AIJ));
478     PetscCall(MatDestroy(&a->OAIJ));
479     if (a->isTI) {
480       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
481        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
482        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
483        * to pass in. */
484       PetscCall(PetscMalloc1(a->p*a->q,&T));
485       for (i=0; i<a->p; i++) {
486         for (j=0; j<a->q; j++) {
487           if (i==j) T[i+j*a->p] = 1.0;
488           else      T[i+j*a->p] = 0.0;
489         }
490       }
491     } else T = a->T;
492     PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ));
493     PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ));
494     if (a->isTI) PetscCall(PetscFree(T));
495     a->state = state;
496   }
497 
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatSetUp_KAIJ(Mat A)
502 {
503   PetscInt       n;
504   PetscMPIInt    size;
505   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
506 
507   PetscFunctionBegin;
508   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
509   if (size == 1) {
510     PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N));
511     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
512     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
513     PetscCall(PetscLayoutSetUp(A->rmap));
514     PetscCall(PetscLayoutSetUp(A->cmap));
515   } else {
516     Mat_MPIKAIJ *a;
517     Mat_MPIAIJ  *mpiaij;
518     IS          from,to;
519     Vec         gvec;
520 
521     a = (Mat_MPIKAIJ*)A->data;
522     mpiaij = (Mat_MPIAIJ*)a->A->data;
523     PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N));
524     PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
525     PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
526     PetscCall(PetscLayoutSetUp(A->rmap));
527     PetscCall(PetscLayoutSetUp(A->cmap));
528 
529     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
530 
531     PetscCall(VecGetSize(mpiaij->lvec,&n));
532     PetscCall(VecCreate(PETSC_COMM_SELF,&a->w));
533     PetscCall(VecSetSizes(a->w,n*a->q,n*a->q));
534     PetscCall(VecSetBlockSize(a->w,a->q));
535     PetscCall(VecSetType(a->w,VECSEQ));
536 
537     /* create two temporary Index sets for build scatter gather */
538     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
539     PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to));
540 
541     /* create temporary global vector to generate scatter context */
542     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec));
543 
544     /* generate the scatter context */
545     PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx));
546 
547     PetscCall(ISDestroy(&from));
548     PetscCall(ISDestroy(&to));
549     PetscCall(VecDestroy(&gvec));
550   }
551 
552   A->assembled = PETSC_TRUE;
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
557 {
558   PetscViewerFormat format;
559   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
560   Mat               B;
561   PetscInt          i;
562   PetscBool         ismpikaij;
563 
564   PetscFunctionBegin;
565   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
566   PetscCall(PetscViewerGetFormat(viewer,&format));
567   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568     PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q));
569 
570     /* Print appropriate details for S. */
571     if (!a->S) {
572       PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n"));
573     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are "));
575       for (i=0; i<(a->p * a->q); i++) {
576 #if defined(PETSC_USE_COMPLEX)
577         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i])));
578 #else
579         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i])));
580 #endif
581       }
582       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
583     }
584 
585     /* Print appropriate details for T. */
586     if (a->isTI) {
587       PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"));
588     } else if (!a->T) {
589       PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n"));
590     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591       PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are "));
592       for (i=0; i<(a->p * a->q); i++) {
593 #if defined(PETSC_USE_COMPLEX)
594         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i])));
595 #else
596         PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i])));
597 #endif
598       }
599       PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
600     }
601 
602     /* Now print details for the AIJ matrix, using the AIJ viewer. */
603     PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n"));
604     if (ismpikaij) {
605       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
606       PetscCall(MatView(b->A,viewer));
607     } else {
608       PetscCall(MatView(a->AIJ,viewer));
609     }
610 
611   } else {
612     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613     PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B));
614     PetscCall(MatView(B,viewer));
615     PetscCall(MatDestroy(&B));
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
621 {
622   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
623 
624   PetscFunctionBegin;
625   PetscCall(MatDestroy(&b->AIJ));
626   PetscCall(MatDestroy(&b->OAIJ));
627   PetscCall(MatDestroy(&b->A));
628   PetscCall(VecScatterDestroy(&b->ctx));
629   PetscCall(VecDestroy(&b->w));
630   PetscCall(PetscFree(b->S));
631   PetscCall(PetscFree(b->T));
632   PetscCall(PetscFree(b->ibdiag));
633   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL));
635   PetscCall(PetscFree(A->data));
636   PetscFunctionReturn(0);
637 }
638 
639 /* --------------------------------------------------------------------------------------*/
640 
641 /* zz = yy + Axx */
642 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
645   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
646   const PetscScalar *s = b->S, *t = b->T;
647   const PetscScalar *x,*v,*bx;
648   PetscScalar       *y,*sums;
649   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
650   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
651 
652   PetscFunctionBegin;
653   if (!yy) {
654     PetscCall(VecSet(zz,0.0));
655   } else {
656     PetscCall(VecCopy(yy,zz));
657   }
658   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
659 
660   PetscCall(VecGetArrayRead(xx,&x));
661   PetscCall(VecGetArray(zz,&y));
662   idx  = a->j;
663   v    = a->a;
664   ii   = a->i;
665 
666   if (b->isTI) {
667     for (i=0; i<m; i++) {
668       jrow = ii[i];
669       n    = ii[i+1] - jrow;
670       sums = y + p*i;
671       for (j=0; j<n; j++) {
672         for (k=0; k<p; k++) {
673           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
674         }
675       }
676     }
677     PetscCall(PetscLogFlops(3.0*(a->nz)*p));
678   } else if (t) {
679     for (i=0; i<m; i++) {
680       jrow = ii[i];
681       n    = ii[i+1] - jrow;
682       sums = y + p*i;
683       for (j=0; j<n; j++) {
684         for (k=0; k<p; k++) {
685           for (l=0; l<q; l++) {
686             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
687           }
688         }
689       }
690     }
691     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
692      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
693      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
694      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
695     PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz));
696   }
697   if (s) {
698     for (i=0; i<m; i++) {
699       sums = y + p*i;
700       bx   = x + q*i;
701       if (i < b->AIJ->cmap->n) {
702         for (j=0; j<q; j++) {
703           for (k=0; k<p; k++) {
704             sums[k] += s[k+j*p]*bx[j];
705           }
706         }
707       }
708     }
709     PetscCall(PetscLogFlops(2.0*m*p*q));
710   }
711 
712   PetscCall(VecRestoreArrayRead(xx,&x));
713   PetscCall(VecRestoreArray(zz,&y));
714   PetscFunctionReturn(0);
715 }
716 
717 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
718 {
719   PetscFunctionBegin;
720   PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy));
721   PetscFunctionReturn(0);
722 }
723 
724 #include <petsc/private/kernels/blockinvert.h>
725 
726 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
727 {
728   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
729   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
730   const PetscScalar *S  = b->S;
731   const PetscScalar *T  = b->T;
732   const PetscScalar *v  = a->a;
733   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
734   PetscInt          i,j,*v_pivots,dof,dof2;
735   PetscScalar       *diag,aval,*v_work;
736 
737   PetscFunctionBegin;
738   PetscCheck(p == q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
739   PetscCheck(S || T || b->isTI,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
740 
741   dof  = p;
742   dof2 = dof*dof;
743 
744   if (b->ibdiagvalid) {
745     if (values) *values = b->ibdiag;
746     PetscFunctionReturn(0);
747   }
748   if (!b->ibdiag) {
749     PetscCall(PetscMalloc1(dof2*m,&b->ibdiag));
750     PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar)));
751   }
752   if (values) *values = b->ibdiag;
753   diag = b->ibdiag;
754 
755   PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots));
756   for (i=0; i<m; i++) {
757     if (S) {
758       PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar)));
759     } else {
760       PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar)));
761     }
762     if (b->isTI) {
763       aval = 0;
764       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
765       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
766     } else if (T) {
767       aval = 0;
768       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
769       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
770     }
771     PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL));
772     diag += dof2;
773   }
774   PetscCall(PetscFree2(v_work,v_pivots));
775 
776   b->ibdiagvalid = PETSC_TRUE;
777   PetscFunctionReturn(0);
778 }
779 
780 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
781 {
782   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
783 
784   PetscFunctionBegin;
785   *B = kaij->AIJ;
786   PetscFunctionReturn(0);
787 }
788 
789 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
790 {
791   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
792   Mat            AIJ,OAIJ,B;
793   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
794   const PetscInt p = a->p,q = a->q;
795   PetscBool      ismpikaij,missing;
796 
797   PetscFunctionBegin;
798   if (reuse != MAT_REUSE_MATRIX) {
799     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
800     if (ismpikaij) {
801       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
802       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
803       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
804     } else {
805       AIJ = a->AIJ;
806       OAIJ = NULL;
807     }
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
809     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
810     PetscCall(MatSetType(B,MATAIJ));
811     PetscCall(MatGetSize(AIJ,&m,NULL));
812     PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */
813     if (!missing || !a->S) d = m;
814     PetscCall(PetscMalloc1(m*p,&d_nnz));
815     for (i = 0; i < m; ++i) {
816       PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
817       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
818       PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
819     }
820     if (OAIJ) {
821       PetscCall(PetscMalloc1(m*p,&o_nnz));
822       for (i = 0; i < m; ++i) {
823         PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
824         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
825         PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
826       }
827       PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
828     } else {
829       PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz));
830     }
831     PetscCall(PetscFree(d_nnz));
832     PetscCall(PetscFree(o_nnz));
833   } else B = *newmat;
834   PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B));
835   if (reuse == MAT_INPLACE_MATRIX) {
836     PetscCall(MatHeaderReplace(A,&B));
837   } else *newmat = B;
838   PetscFunctionReturn(0);
839 }
840 
841 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
842 {
843   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
844   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
845   const PetscScalar *aa = a->a, *T = kaij->T, *v;
846   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
847   const PetscScalar *b, *xb, *idiag;
848   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
849   PetscInt          i, j, k, i2, bs, bs2, nz;
850 
851   PetscFunctionBegin;
852   its = its*lits;
853   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
854   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
855   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
856   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
857   PetscCheck(p == q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
858   bs = p;
859   bs2 = bs*bs;
860 
861   if (!m) PetscFunctionReturn(0);
862 
863   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL));
864   idiag = kaij->ibdiag;
865   diag  = a->diag;
866 
867   if (!kaij->sor.setup) {
868     PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr));
869     kaij->sor.setup = PETSC_TRUE;
870   }
871   y     = kaij->sor.y;
872   w     = kaij->sor.w;
873   work  = kaij->sor.work;
874   t     = kaij->sor.t;
875   arr   = kaij->sor.arr;
876 
877   PetscCall( VecGetArray(xx,&x));
878   PetscCall(VecGetArrayRead(bb,&b));
879 
880   if (flag & SOR_ZERO_INITIAL_GUESS) {
881     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
882       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
883       PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar)));
884       i2     =  bs;
885       idiag  += bs2;
886       for (i=1; i<m; i++) {
887         v  = aa + ai[i];
888         vi = aj + ai[i];
889         nz = diag[i] - ai[i];
890 
891         if (T) {                /* b - T (Arow * x) */
892           PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar)));
893           for (j=0; j<nz; j++) {
894             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
895           }
896           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
897           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
898         } else if (kaij->isTI) {
899           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
900           for (j=0; j<nz; j++) {
901             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
902           }
903         } else {
904           PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
905         }
906 
907         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
908         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
909 
910         idiag += bs2;
911         i2    += bs;
912       }
913       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
914       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
915       xb = t;
916     } else xb = b;
917     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
918       idiag = kaij->ibdiag+bs2*(m-1);
919       i2    = bs * (m-1);
920       PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
921       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
922       i2    -= bs;
923       idiag -= bs2;
924       for (i=m-2; i>=0; i--) {
925         v  = aa + diag[i] + 1 ;
926         vi = aj + diag[i] + 1;
927         nz = ai[i+1] - diag[i] - 1;
928 
929         if (T) {                /* FIXME: This branch untested */
930           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
931           /* copy all rows of x that are needed into contiguous space */
932           workt = work;
933           for (j=0; j<nz; j++) {
934             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
935             workt += bs;
936           }
937           arrt = arr;
938           for (j=0; j<nz; j++) {
939             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
940             for (k=0; k<bs2; k++) arrt[k] *= v[j];
941             arrt += bs2;
942           }
943           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
944         } else if (kaij->isTI) {
945           PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar)));
946           for (j=0; j<nz; j++) {
947             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
948           }
949         }
950 
951         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
952         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
953 
954         idiag -= bs2;
955         i2    -= bs;
956       }
957       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
958     }
959     its--;
960   }
961   while (its--) {               /* FIXME: This branch not updated */
962     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
963       i2     =  0;
964       idiag  = kaij->ibdiag;
965       for (i=0; i<m; i++) {
966         PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
967 
968         v  = aa + ai[i];
969         vi = aj + ai[i];
970         nz = diag[i] - ai[i];
971         workt = work;
972         for (j=0; j<nz; j++) {
973           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
974           workt += bs;
975         }
976         arrt = arr;
977         if (T) {
978           for (j=0; j<nz; j++) {
979             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
980             for (k=0; k<bs2; k++) arrt[k] *= v[j];
981             arrt += bs2;
982           }
983           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
984         } else if (kaij->isTI) {
985           for (j=0; j<nz; j++) {
986             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
987             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
988             arrt += bs2;
989           }
990           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
991         }
992         PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar)));
993 
994         v  = aa + diag[i] + 1;
995         vi = aj + diag[i] + 1;
996         nz = ai[i+1] - diag[i] - 1;
997         workt = work;
998         for (j=0; j<nz; j++) {
999           PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1000           workt += bs;
1001         }
1002         arrt = arr;
1003         if (T) {
1004           for (j=0; j<nz; j++) {
1005             PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1006             for (k=0; k<bs2; k++) arrt[k] *= v[j];
1007             arrt += bs2;
1008           }
1009           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1010         } else if (kaij->isTI) {
1011           for (j=0; j<nz; j++) {
1012             PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1013             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1014             arrt += bs2;
1015           }
1016           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1017         }
1018 
1019         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1020         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1021 
1022         idiag += bs2;
1023         i2    += bs;
1024       }
1025       xb = t;
1026     }
1027     else xb = b;
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = kaij->ibdiag+bs2*(m-1);
1030       i2    = bs * (m-1);
1031       if (xb == b) {
1032         for (i=m-1; i>=0; i--) {
1033           PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
1034 
1035           v  = aa + ai[i];
1036           vi = aj + ai[i];
1037           nz = diag[i] - ai[i];
1038           workt = work;
1039           for (j=0; j<nz; j++) {
1040             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1041             workt += bs;
1042           }
1043           arrt = arr;
1044           if (T) {
1045             for (j=0; j<nz; j++) {
1046               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1047               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1048               arrt += bs2;
1049             }
1050             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1051           } else if (kaij->isTI) {
1052             for (j=0; j<nz; j++) {
1053               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1054               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1055               arrt += bs2;
1056             }
1057             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1058           }
1059 
1060           v  = aa + diag[i] + 1;
1061           vi = aj + diag[i] + 1;
1062           nz = ai[i+1] - diag[i] - 1;
1063           workt = work;
1064           for (j=0; j<nz; j++) {
1065             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1066             workt += bs;
1067           }
1068           arrt = arr;
1069           if (T) {
1070             for (j=0; j<nz; j++) {
1071               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1072               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1073               arrt += bs2;
1074             }
1075             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1076           } else if (kaij->isTI) {
1077             for (j=0; j<nz; j++) {
1078               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1079               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1080               arrt += bs2;
1081             }
1082             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1083           }
1084 
1085           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1086           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1087         }
1088       } else {
1089         for (i=m-1; i>=0; i--) {
1090           PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
1091           v  = aa + diag[i] + 1;
1092           vi = aj + diag[i] + 1;
1093           nz = ai[i+1] - diag[i] - 1;
1094           workt = work;
1095           for (j=0; j<nz; j++) {
1096             PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1097             workt += bs;
1098           }
1099           arrt = arr;
1100           if (T) {
1101             for (j=0; j<nz; j++) {
1102               PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1103               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1104               arrt += bs2;
1105             }
1106             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1107           } else if (kaij->isTI) {
1108             for (j=0; j<nz; j++) {
1109               PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1110               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1111               arrt += bs2;
1112             }
1113             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1114           }
1115           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1116           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1117         }
1118       }
1119       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
1120     }
1121   }
1122 
1123   PetscCall(VecRestoreArray(xx,&x));
1124   PetscCall(VecRestoreArrayRead(bb,&b));
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 /*===================================================================================*/
1129 
1130 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1133 
1134   PetscFunctionBegin;
1135   if (!yy) {
1136     PetscCall(VecSet(zz,0.0));
1137   } else {
1138     PetscCall(VecCopy(yy,zz));
1139   }
1140   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1141   /* start the scatter */
1142   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1143   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz));
1144   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1145   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1150 {
1151   PetscFunctionBegin;
1152   PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1157 {
1158   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1162   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values));
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 /* ----------------------------------------------------------------*/
1167 
1168 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1169 {
1170   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1171   PetscErrorCode  diag = PETSC_FALSE;
1172   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1173   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1174 
1175   PetscFunctionBegin;
1176   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1177   b->getrowactive = PETSC_TRUE;
1178   PetscCheck(row >= 0 && row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1179 
1180   if ((!S) && (!T) && (!b->isTI)) {
1181     if (ncols)    *ncols  = 0;
1182     if (cols)     *cols   = NULL;
1183     if (values)   *values = NULL;
1184     PetscFunctionReturn(0);
1185   }
1186 
1187   if (T || b->isTI) {
1188     PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij));
1189     c     = nzaij;
1190     for (i=0; i<nzaij; i++) {
1191       /* check if this row contains a diagonal entry */
1192       if (colsaij[i] == r) {
1193         diag = PETSC_TRUE;
1194         c = i;
1195       }
1196     }
1197   } else nzaij = c = 0;
1198 
1199   /* calculate size of row */
1200   nz = 0;
1201   if (S)            nz += q;
1202   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1203 
1204   if (cols || values) {
1205     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1206     for (i=0; i<q; i++) {
1207       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1208       v[i] = 0.0;
1209     }
1210     if (b->isTI) {
1211       for (i=0; i<nzaij; i++) {
1212         for (j=0; j<q; j++) {
1213           idx[i*q+j] = colsaij[i]*q+j;
1214           v[i*q+j]   = (j==s ? vaij[i] : 0);
1215         }
1216       }
1217     } else if (T) {
1218       for (i=0; i<nzaij; i++) {
1219         for (j=0; j<q; j++) {
1220           idx[i*q+j] = colsaij[i]*q+j;
1221           v[i*q+j]   = vaij[i]*T[s+j*p];
1222         }
1223       }
1224     }
1225     if (S) {
1226       for (j=0; j<q; j++) {
1227         idx[c*q+j] = r*q+j;
1228         v[c*q+j]  += S[s+j*p];
1229       }
1230     }
1231   }
1232 
1233   if (ncols)    *ncols  = nz;
1234   if (cols)     *cols   = idx;
1235   if (values)   *values = v;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1240 {
1241   PetscFunctionBegin;
1242   if (nz) *nz = 0;
1243   PetscCall(PetscFree2(*idx,*v));
1244   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1249 {
1250   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1251   Mat             AIJ     = b->A;
1252   PetscBool       diag    = PETSC_FALSE;
1253   Mat             MatAIJ,MatOAIJ;
1254   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1255   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1256   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1260   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1261   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1262   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1263   b->getrowactive = PETSC_TRUE;
1264   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1265   lrow = row - rstart;
1266 
1267   if ((!S) && (!T) && (!b->isTI)) {
1268     if (ncols)    *ncols  = 0;
1269     if (cols)     *cols   = NULL;
1270     if (values)   *values = NULL;
1271     PetscFunctionReturn(0);
1272   }
1273 
1274   r = lrow/p;
1275   s = lrow%p;
1276 
1277   if (T || b->isTI) {
1278     PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray));
1279     PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals));
1280     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals));
1281 
1282     c     = ncolsaij + ncolsoaij;
1283     for (i=0; i<ncolsaij; i++) {
1284       /* check if this row contains a diagonal entry */
1285       if (colsaij[i] == r) {
1286         diag = PETSC_TRUE;
1287         c = i;
1288       }
1289     }
1290   } else c = 0;
1291 
1292   /* calculate size of row */
1293   nz = 0;
1294   if (S)            nz += q;
1295   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1296 
1297   if (cols || values) {
1298     PetscCall(PetscMalloc2(nz,&idx,nz,&v));
1299     for (i=0; i<q; i++) {
1300       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1301       v[i] = 0.0;
1302     }
1303     if (b->isTI) {
1304       for (i=0; i<ncolsaij; i++) {
1305         for (j=0; j<q; j++) {
1306           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1307           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1308         }
1309       }
1310       for (i=0; i<ncolsoaij; i++) {
1311         for (j=0; j<q; j++) {
1312           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1313           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1314         }
1315       }
1316     } else if (T) {
1317       for (i=0; i<ncolsaij; i++) {
1318         for (j=0; j<q; j++) {
1319           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1320           v[i*q+j]   = vals[i]*T[s+j*p];
1321         }
1322       }
1323       for (i=0; i<ncolsoaij; i++) {
1324         for (j=0; j<q; j++) {
1325           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1326           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1327         }
1328       }
1329     }
1330     if (S) {
1331       for (j=0; j<q; j++) {
1332         idx[c*q+j] = (r+rstart/p)*q+j;
1333         v[c*q+j]  += S[s+j*p];
1334       }
1335     }
1336   }
1337 
1338   if (ncols)  *ncols  = nz;
1339   if (cols)   *cols   = idx;
1340   if (values) *values = v;
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1345 {
1346   PetscFunctionBegin;
1347   PetscCall(PetscFree2(*idx,*v));
1348   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1353 {
1354   Mat            A;
1355 
1356   PetscFunctionBegin;
1357   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
1358   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
1359   PetscCall(MatDestroy(&A));
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 /* ---------------------------------------------------------------------------------- */
1364 /*@C
1365   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1366 
1367     [I \otimes S + A \otimes T]
1368 
1369   where
1370     S is a dense (p \times q) matrix
1371     T is a dense (p \times q) matrix
1372     A is an AIJ  (n \times n) matrix
1373     I is the identity matrix
1374   The resulting matrix is (np \times nq)
1375 
1376   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1377 
1378   Collective
1379 
1380   Input Parameters:
1381 + A - the AIJ matrix
1382 . p - number of rows in S and T
1383 . q - number of columns in S and T
1384 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1385 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1386 
1387   Output Parameter:
1388 . kaij - the new KAIJ matrix
1389 
1390   Notes:
1391   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1392   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1393 
1394   Developer Notes:
1395   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1396   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1397   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1398   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1399 
1400   Level: advanced
1401 
1402 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1403 @*/
1404 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1405 {
1406   PetscFunctionBegin;
1407   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij));
1408   PetscCall(MatSetType(*kaij,MATKAIJ));
1409   PetscCall(MatKAIJSetAIJ(*kaij,A));
1410   PetscCall(MatKAIJSetS(*kaij,p,q,S));
1411   PetscCall(MatKAIJSetT(*kaij,p,q,T));
1412   PetscCall(MatSetUp(*kaij));
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*MC
1417   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1418     [I \otimes S + A \otimes T],
1419   where
1420     S is a dense (p \times q) matrix,
1421     T is a dense (p \times q) matrix,
1422     A is an AIJ  (n \times n) matrix,
1423     and I is the identity matrix.
1424   The resulting matrix is (np \times nq).
1425 
1426   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1427 
1428   Notes:
1429   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1430   where x and b are column vectors containing the row-major representations of X and B.
1431 
1432   Level: advanced
1433 
1434 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1435 M*/
1436 
1437 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1438 {
1439   Mat_MPIKAIJ    *b;
1440   PetscMPIInt    size;
1441 
1442   PetscFunctionBegin;
1443   PetscCall(PetscNewLog(A,&b));
1444   A->data  = (void*)b;
1445 
1446   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
1447 
1448   b->w    = NULL;
1449   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
1450   if (size == 1) {
1451     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ));
1452     A->ops->destroy             = MatDestroy_SeqKAIJ;
1453     A->ops->mult                = MatMult_SeqKAIJ;
1454     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1455     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1456     A->ops->getrow              = MatGetRow_SeqKAIJ;
1457     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1458     A->ops->sor                 = MatSOR_SeqKAIJ;
1459     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ));
1460   } else {
1461     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ));
1462     A->ops->destroy             = MatDestroy_MPIKAIJ;
1463     A->ops->mult                = MatMult_MPIKAIJ;
1464     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1465     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1466     A->ops->getrow              = MatGetRow_MPIKAIJ;
1467     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1468     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ));
1469     PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ));
1470   }
1471   A->ops->setup           = MatSetUp_KAIJ;
1472   A->ops->view            = MatView_KAIJ;
1473   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1474   PetscFunctionReturn(0);
1475 }
1476