xref: /petsc/src/mat/impls/kaij/kaij.c (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameters:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameters:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the KAIJ matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: MatKAIJGetS(), MatKAIJGetT()
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371 {
372   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373   PetscInt    i,j;
374 
375   PetscFunctionBegin;
376   if (a->p != a->q) {
377     *identity = PETSC_FALSE;
378     PetscFunctionReturn(0);
379   } else *identity = PETSC_TRUE;
380   if (!a->isTI || a->S) {
381     for (i=0; i<a->p && *identity; i++) {
382       for (j=0; j<a->p && *identity; j++) {
383         if (i != j) {
384           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386         } else {
387           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         }
390       }
391     }
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 /*@C
397    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
398 
399    Logically Collective; the entire T is stored independently on all processes.
400 
401    Input Parameters:
402 +  A - the KAIJ matrix
403 .  p - the number of rows in S
404 .  q - the number of columns in S
405 -  T - the T matrix, in form of a scalar array in column-major format
406 
407    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408    The T matrix is copied, so the user can destroy this array.
409 
410    Level: Advanced
411 
412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413 @*/
414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415 {
416   PetscErrorCode ierr;
417   PetscInt       i,j;
418   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419   PetscBool      isTI = PETSC_FALSE;
420 
421   PetscFunctionBegin;
422   /* check if T is an identity matrix */
423   if (T && (p == q)) {
424     isTI = PETSC_TRUE;
425     for (i=0; i<p; i++) {
426       for (j=0; j<q; j++) {
427         if (i == j) {
428           /* diagonal term must be 1 */
429           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430         } else {
431           /* off-diagonal term must be 0 */
432           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433         }
434       }
435     }
436   }
437   a->isTI = isTI;
438 
439   ierr = PetscFree(a->T);CHKERRQ(ierr);
440   if (T && (!isTI)) {
441     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
442     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
443   } else a->T = NULL;
444 
445   a->p = p;
446   a->q = q;
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451 {
452   PetscErrorCode ierr;
453   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
454 
455   PetscFunctionBegin;
456   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
457   ierr = PetscFree(b->S);CHKERRQ(ierr);
458   ierr = PetscFree(b->T);CHKERRQ(ierr);
459   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
460   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
461   ierr = PetscFree(A->data);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
466 {
467   PetscErrorCode   ierr;
468   Mat_MPIKAIJ      *a;
469   Mat_MPIAIJ       *mpiaij;
470   PetscScalar      *T;
471   PetscInt         i,j;
472   PetscObjectState state;
473 
474   PetscFunctionBegin;
475   a = (Mat_MPIKAIJ*)A->data;
476   mpiaij = (Mat_MPIAIJ*)a->A->data;
477 
478   ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr);
479   if (state == a->state) {
480     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
481     PetscFunctionReturn(0);
482   } else {
483     ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr);
484     ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr);
485     if (a->isTI) {
486       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
487        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
488        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
489        * to pass in. */
490       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
491       for (i=0; i<a->p; i++) {
492         for (j=0; j<a->q; j++) {
493           if (i==j) T[i+j*a->p] = 1.0;
494           else      T[i+j*a->p] = 0.0;
495         }
496       }
497     } else T = a->T;
498     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
499     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
500     if (a->isTI) {
501       ierr = PetscFree(T);CHKERRQ(ierr);
502     }
503     a->state = state;
504   }
505 
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetUp_KAIJ(Mat A)
510 {
511   PetscErrorCode ierr;
512   PetscInt       n;
513   PetscMPIInt    size;
514   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
515 
516   PetscFunctionBegin;
517   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
518   if (size == 1) {
519     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
520     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
521     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
522     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
523     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
524   } else {
525     Mat_MPIKAIJ *a;
526     Mat_MPIAIJ  *mpiaij;
527     IS          from,to;
528     Vec         gvec;
529 
530     a = (Mat_MPIKAIJ*)A->data;
531     mpiaij = (Mat_MPIAIJ*)a->A->data;
532     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
533     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
534     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
535     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
536     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
537 
538     ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr);
539 
540     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
541     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
542     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
543     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
544     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
545 
546     /* create two temporary Index sets for build scatter gather */
547     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
548     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
549 
550     /* create temporary global vector to generate scatter context */
551     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
552 
553     /* generate the scatter context */
554     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
555 
556     ierr = ISDestroy(&from);CHKERRQ(ierr);
557     ierr = ISDestroy(&to);CHKERRQ(ierr);
558     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
559   }
560 
561   A->assembled = PETSC_TRUE;
562   PetscFunctionReturn(0);
563 }
564 
565 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
566 {
567   PetscViewerFormat format;
568   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
569   Mat               B;
570   PetscInt          i;
571   PetscErrorCode    ierr;
572   PetscBool         ismpikaij;
573 
574   PetscFunctionBegin;
575   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
576   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
577   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
579 
580     /* Print appropriate details for S. */
581     if (!a->S) {
582       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
583     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
585       for (i=0; i<(a->p * a->q); i++) {
586 #if defined(PETSC_USE_COMPLEX)
587         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
588 #else
589         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
590 #endif
591       }
592       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
593     }
594 
595     /* Print appropriate details for T. */
596     if (a->isTI) {
597       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
598     } else if (!a->T) {
599       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
600     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
602       for (i=0; i<(a->p * a->q); i++) {
603 #if defined(PETSC_USE_COMPLEX)
604         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
605 #else
606         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
607 #endif
608       }
609       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
610     }
611 
612     /* Now print details for the AIJ matrix, using the AIJ viewer. */
613     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
614     if (ismpikaij) {
615       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
616       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
617     } else {
618       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
619     }
620 
621   } else {
622     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623     if (ismpikaij) {
624       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
625     } else {
626       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
627     }
628     ierr = MatView(B,viewer);CHKERRQ(ierr);
629     ierr = MatDestroy(&B);CHKERRQ(ierr);
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
635 {
636   PetscErrorCode ierr;
637   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
638 
639   PetscFunctionBegin;
640   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
641   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
642   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
643   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
644   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
645   ierr = PetscFree(b->S);CHKERRQ(ierr);
646   ierr = PetscFree(b->T);CHKERRQ(ierr);
647   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
648   ierr = PetscFree(A->data);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 /* --------------------------------------------------------------------------------------*/
653 
654 /* zz = yy + Axx */
655 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
656 {
657   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659   const PetscScalar *s = b->S, *t = b->T;
660   const PetscScalar *x,*v,*bx;
661   PetscScalar       *y,*sums;
662   PetscErrorCode    ierr;
663   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
664   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
665 
666   PetscFunctionBegin;
667   if (!yy) {
668     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
669   } else {
670     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
671   }
672   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
673 
674   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
675   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
676   idx  = a->j;
677   v    = a->a;
678   ii   = a->i;
679 
680   if (b->isTI) {
681     for (i=0; i<m; i++) {
682       jrow = ii[i];
683       n    = ii[i+1] - jrow;
684       sums = y + p*i;
685       for (j=0; j<n; j++) {
686         for (k=0; k<p; k++) {
687           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
688         }
689       }
690     }
691     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
692   } else if (t) {
693     for (i=0; i<m; i++) {
694       jrow = ii[i];
695       n    = ii[i+1] - jrow;
696       sums = y + p*i;
697       for (j=0; j<n; j++) {
698         for (k=0; k<p; k++) {
699           for (l=0; l<q; l++) {
700             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
701           }
702         }
703       }
704     }
705     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
706      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
707      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
708      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
709     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
710   }
711   if (s) {
712     for (i=0; i<m; i++) {
713       sums = y + p*i;
714       bx   = x + q*i;
715       if (i < b->AIJ->cmap->n) {
716         for (j=0; j<q; j++) {
717           for (k=0; k<p; k++) {
718             sums[k] += s[k+j*p]*bx[j];
719           }
720         }
721       }
722     }
723     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
724   }
725 
726   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
727   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
732 {
733   PetscErrorCode ierr;
734   PetscFunctionBegin;
735   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #include <petsc/private/kernels/blockinvert.h>
740 
741 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
742 {
743   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
744   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
745   const PetscScalar *S  = b->S;
746   const PetscScalar *T  = b->T;
747   const PetscScalar *v  = a->a;
748   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
749   PetscErrorCode    ierr;
750   PetscInt          i,j,*v_pivots,dof,dof2;
751   PetscScalar       *diag,aval,*v_work;
752 
753   PetscFunctionBegin;
754   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
755   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
756 
757   dof  = p;
758   dof2 = dof*dof;
759 
760   if (b->ibdiagvalid) {
761     if (values) *values = b->ibdiag;
762     PetscFunctionReturn(0);
763   }
764   if (!b->ibdiag) {
765     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
766     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
767   }
768   if (values) *values = b->ibdiag;
769   diag = b->ibdiag;
770 
771   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
772   for (i=0; i<m; i++) {
773     if (S) {
774       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
775     } else {
776       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
777     }
778     if (b->isTI) {
779       aval = 0;
780       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
781       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
782     } else if (T) {
783       aval = 0;
784       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
785       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
786     }
787     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
788     diag += dof2;
789   }
790   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
791 
792   b->ibdiagvalid = PETSC_TRUE;
793   PetscFunctionReturn(0);
794 }
795 
796 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
797 {
798   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
799 
800   PetscFunctionBegin;
801   *B = kaij->AIJ;
802   PetscFunctionReturn(0);
803 }
804 
805 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
806 {
807   PetscErrorCode    ierr;
808   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
809   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
810   const PetscScalar *aa = a->a, *T = kaij->T, *v;
811   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
812   const PetscScalar *b, *xb, *idiag;
813   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
814   PetscInt          i, j, k, i2, bs, bs2, nz;
815 
816   PetscFunctionBegin;
817   its = its*lits;
818   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
819   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
821   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
822   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
823   else        {bs = p; bs2 = bs*bs; }
824 
825   if (!m) PetscFunctionReturn(0);
826 
827   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
828   idiag = kaij->ibdiag;
829   diag  = a->diag;
830 
831   if (!kaij->sor.setup) {
832     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
833     kaij->sor.setup = PETSC_TRUE;
834   }
835   y     = kaij->sor.y;
836   w     = kaij->sor.w;
837   work  = kaij->sor.work;
838   t     = kaij->sor.t;
839   arr   = kaij->sor.arr;
840 
841   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
842   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
847       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
848       i2     =  bs;
849       idiag  += bs2;
850       for (i=1; i<m; i++) {
851         v  = aa + ai[i];
852         vi = aj + ai[i];
853         nz = diag[i] - ai[i];
854 
855         if (T) {                /* b - T (Arow * x) */
856           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
857           for (j=0; j<nz; j++) {
858             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
859           }
860           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
861           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
862         } else if (kaij->isTI) {
863           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           for (j=0; j<nz; j++) {
865             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
866           }
867         } else {
868           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
869         }
870 
871         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
872         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
873 
874         idiag += bs2;
875         i2    += bs;
876       }
877       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
878       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
879       xb = t;
880     } else xb = b;
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag = kaij->ibdiag+bs2*(m-1);
883       i2    = bs * (m-1);
884       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
886       i2    -= bs;
887       idiag -= bs2;
888       for (i=m-2; i>=0; i--) {
889         v  = aa + diag[i] + 1 ;
890         vi = aj + diag[i] + 1;
891         nz = ai[i+1] - diag[i] - 1;
892 
893         if (T) {                /* FIXME: This branch untested */
894           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
895           /* copy all rows of x that are needed into contiguous space */
896           workt = work;
897           for (j=0; j<nz; j++) {
898             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
899             workt += bs;
900           }
901           arrt = arr;
902           for (j=0; j<nz; j++) {
903             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
904             for (k=0; k<bs2; k++) arrt[k] *= v[j];
905             arrt += bs2;
906           }
907           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908         } else if (kaij->isTI) {
909           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
910           for (j=0; j<nz; j++) {
911             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
912           }
913         }
914 
915         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
916         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
917 
918         idiag -= bs2;
919         i2    -= bs;
920       }
921       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
922     }
923     its--;
924   }
925   while (its--) {               /* FIXME: This branch not updated */
926     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
927       i2     =  0;
928       idiag  = kaij->ibdiag;
929       for (i=0; i<m; i++) {
930         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
931 
932         v  = aa + ai[i];
933         vi = aj + ai[i];
934         nz = diag[i] - ai[i];
935         workt = work;
936         for (j=0; j<nz; j++) {
937           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
938           workt += bs;
939         }
940         arrt = arr;
941         if (T) {
942           for (j=0; j<nz; j++) {
943             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
944             for (k=0; k<bs2; k++) arrt[k] *= v[j];
945             arrt += bs2;
946           }
947           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
948         } else if (kaij->isTI) {
949           for (j=0; j<nz; j++) {
950             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
951             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
952             arrt += bs2;
953           }
954           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
955         }
956         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
957 
958         v  = aa + diag[i] + 1;
959         vi = aj + diag[i] + 1;
960         nz = ai[i+1] - diag[i] - 1;
961         workt = work;
962         for (j=0; j<nz; j++) {
963           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
964           workt += bs;
965         }
966         arrt = arr;
967         if (T) {
968           for (j=0; j<nz; j++) {
969             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
970             for (k=0; k<bs2; k++) arrt[k] *= v[j];
971             arrt += bs2;
972           }
973           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974         } else if (kaij->isTI) {
975           for (j=0; j<nz; j++) {
976             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
977             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978             arrt += bs2;
979           }
980           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981         }
982 
983         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
985 
986         idiag += bs2;
987         i2    += bs;
988       }
989       xb = t;
990     }
991     else xb = b;
992     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
993       idiag = kaij->ibdiag+bs2*(m-1);
994       i2    = bs * (m-1);
995       if (xb == b) {
996         for (i=m-1; i>=0; i--) {
997           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
998 
999           v  = aa + ai[i];
1000           vi = aj + ai[i];
1001           nz = diag[i] - ai[i];
1002           workt = work;
1003           for (j=0; j<nz; j++) {
1004             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1005             workt += bs;
1006           }
1007           arrt = arr;
1008           if (T) {
1009             for (j=0; j<nz; j++) {
1010               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1011               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012               arrt += bs2;
1013             }
1014             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015           } else if (kaij->isTI) {
1016             for (j=0; j<nz; j++) {
1017               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1018               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019               arrt += bs2;
1020             }
1021             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022           }
1023 
1024           v  = aa + diag[i] + 1;
1025           vi = aj + diag[i] + 1;
1026           nz = ai[i+1] - diag[i] - 1;
1027           workt = work;
1028           for (j=0; j<nz; j++) {
1029             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1030             workt += bs;
1031           }
1032           arrt = arr;
1033           if (T) {
1034             for (j=0; j<nz; j++) {
1035               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1036               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1037               arrt += bs2;
1038             }
1039             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1040           } else if (kaij->isTI) {
1041             for (j=0; j<nz; j++) {
1042               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1043               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1044               arrt += bs2;
1045             }
1046             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1047           }
1048 
1049           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1050           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1051         }
1052       } else {
1053         for (i=m-1; i>=0; i--) {
1054           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1055           v  = aa + diag[i] + 1;
1056           vi = aj + diag[i] + 1;
1057           nz = ai[i+1] - diag[i] - 1;
1058           workt = work;
1059           for (j=0; j<nz; j++) {
1060             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1061             workt += bs;
1062           }
1063           arrt = arr;
1064           if (T) {
1065             for (j=0; j<nz; j++) {
1066               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1067               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1068               arrt += bs2;
1069             }
1070             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1071           } else if (kaij->isTI) {
1072             for (j=0; j<nz; j++) {
1073               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1074               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078           }
1079           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1080           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1081         }
1082       }
1083       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1084     }
1085   }
1086 
1087   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1088   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*===================================================================================*/
1093 
1094 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1095 {
1096   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   if (!yy) {
1101     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1102   } else {
1103     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1104   }
1105   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
1106   /* start the scatter */
1107   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1115 {
1116   PetscErrorCode ierr;
1117   PetscFunctionBegin;
1118   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1123 {
1124   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1125   PetscErrorCode  ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ is up to date. */
1129   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /* ----------------------------------------------------------------*/
1134 
1135 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1136 {
1137   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1138   PetscErrorCode  diag = PETSC_FALSE;
1139   PetscErrorCode  ierr;
1140   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1141   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1142 
1143   PetscFunctionBegin;
1144   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1145   b->getrowactive = PETSC_TRUE;
1146   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1147 
1148   if ((!S) && (!T) && (!b->isTI)) {
1149     if (ncols)    *ncols  = 0;
1150     if (cols)     *cols   = NULL;
1151     if (values)   *values = NULL;
1152     PetscFunctionReturn(0);
1153   }
1154 
1155   if (T || b->isTI) {
1156     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1157     c     = nzaij;
1158     for (i=0; i<nzaij; i++) {
1159       /* check if this row contains a diagonal entry */
1160       if (colsaij[i] == r) {
1161         diag = PETSC_TRUE;
1162         c = i;
1163       }
1164     }
1165   } else nzaij = c = 0;
1166 
1167   /* calculate size of row */
1168   nz = 0;
1169   if (S)            nz += q;
1170   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1171 
1172   if (cols || values) {
1173     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1174     for (i=0; i<q; i++) {
1175       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1176       v[i] = 0.0;
1177     }
1178     if (b->isTI) {
1179       for (i=0; i<nzaij; i++) {
1180         for (j=0; j<q; j++) {
1181           idx[i*q+j] = colsaij[i]*q+j;
1182           v[i*q+j]   = (j==s ? vaij[i] : 0);
1183         }
1184       }
1185     } else if (T) {
1186       for (i=0; i<nzaij; i++) {
1187         for (j=0; j<q; j++) {
1188           idx[i*q+j] = colsaij[i]*q+j;
1189           v[i*q+j]   = vaij[i]*T[s+j*p];
1190         }
1191       }
1192     }
1193     if (S) {
1194       for (j=0; j<q; j++) {
1195         idx[c*q+j] = r*q+j;
1196         v[c*q+j]  += S[s+j*p];
1197       }
1198     }
1199   }
1200 
1201   if (ncols)    *ncols  = nz;
1202   if (cols)     *cols   = idx;
1203   if (values)   *values = v;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1208 {
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212   if (nz) *nz = 0;
1213   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1214   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1219 {
1220   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1221   Mat             AIJ     = b->A;
1222   PetscBool       diag    = PETSC_FALSE;
1223   Mat             MatAIJ,MatOAIJ;
1224   PetscErrorCode  ierr;
1225   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1226   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1227   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1228 
1229   PetscFunctionBegin;
1230   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
1231   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1232   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1233   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1234   b->getrowactive = PETSC_TRUE;
1235   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1236   lrow = row - rstart;
1237 
1238   if ((!S) && (!T) && (!b->isTI)) {
1239     if (ncols)    *ncols  = 0;
1240     if (cols)     *cols   = NULL;
1241     if (values)   *values = NULL;
1242     PetscFunctionReturn(0);
1243   }
1244 
1245   r = lrow/p;
1246   s = lrow%p;
1247 
1248   if (T || b->isTI) {
1249     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1250     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1251     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1252 
1253     c     = ncolsaij + ncolsoaij;
1254     for (i=0; i<ncolsaij; i++) {
1255       /* check if this row contains a diagonal entry */
1256       if (colsaij[i] == r) {
1257         diag = PETSC_TRUE;
1258         c = i;
1259       }
1260     }
1261   } else c = 0;
1262 
1263   /* calculate size of row */
1264   nz = 0;
1265   if (S)            nz += q;
1266   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1267 
1268   if (cols || values) {
1269     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1270     for (i=0; i<q; i++) {
1271       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1272       v[i] = 0.0;
1273     }
1274     if (b->isTI) {
1275       for (i=0; i<ncolsaij; i++) {
1276         for (j=0; j<q; j++) {
1277           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1278           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1279         }
1280       }
1281       for (i=0; i<ncolsoaij; i++) {
1282         for (j=0; j<q; j++) {
1283           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1284           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1285         }
1286       }
1287     } else if (T) {
1288       for (i=0; i<ncolsaij; i++) {
1289         for (j=0; j<q; j++) {
1290           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1291           v[i*q+j]   = vals[i]*T[s+j*p];
1292         }
1293       }
1294       for (i=0; i<ncolsoaij; i++) {
1295         for (j=0; j<q; j++) {
1296           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1297           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1298         }
1299       }
1300     }
1301     if (S) {
1302       for (j=0; j<q; j++) {
1303         idx[c*q+j] = (r+rstart/p)*q+j;
1304         v[c*q+j]  += S[s+j*p];
1305       }
1306     }
1307   }
1308 
1309   if (ncols)  *ncols  = nz;
1310   if (cols)   *cols   = idx;
1311   if (values) *values = v;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1316 {
1317   PetscErrorCode ierr;
1318   PetscFunctionBegin;
1319   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1320   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1325 {
1326   PetscErrorCode ierr;
1327   Mat            A;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1331   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1332   ierr = MatDestroy(&A);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 /* ---------------------------------------------------------------------------------- */
1337 /*@C
1338   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1339 
1340     [I \otimes S + A \otimes T]
1341 
1342   where
1343     S is a dense (p \times q) matrix
1344     T is a dense (p \times q) matrix
1345     A is an AIJ  (n \times n) matrix
1346     I is the identity matrix
1347   The resulting matrix is (np \times nq)
1348 
1349   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1350 
1351   Collective
1352 
1353   Input Parameters:
1354 + A - the AIJ matrix
1355 . p - number of rows in S and T
1356 . q - number of columns in S and T
1357 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1358 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1359 
1360   Output Parameter:
1361 . kaij - the new KAIJ matrix
1362 
1363   Notes:
1364   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1365   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1366 
1367   Developer Notes:
1368   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1369   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1370   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1371   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1372 
1373   Level: advanced
1374 
1375 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1376 @*/
1377 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1378 {
1379   PetscErrorCode ierr;
1380   PetscMPIInt    size;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1384   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1385   if (size == 1) {
1386     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1387   } else {
1388     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1389   }
1390   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1391   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1392   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1393   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /*MC
1398   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1399     [I \otimes S + A \otimes T],
1400   where
1401     S is a dense (p \times q) matrix,
1402     T is a dense (p \times q) matrix,
1403     A is an AIJ  (n \times n) matrix,
1404     and I is the identity matrix.
1405   The resulting matrix is (np \times nq).
1406 
1407   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1408 
1409   Notes:
1410   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1411   where x and b are column vectors containing the row-major representations of X and B.
1412 
1413   Level: advanced
1414 
1415 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1416 M*/
1417 
1418 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1419 {
1420   PetscErrorCode ierr;
1421   Mat_MPIKAIJ    *b;
1422   PetscMPIInt    size;
1423 
1424   PetscFunctionBegin;
1425   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1426   A->data  = (void*)b;
1427 
1428   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1429 
1430   A->ops->setup = MatSetUp_KAIJ;
1431 
1432   b->w    = NULL;
1433   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1434   if (size == 1) {
1435     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1436     A->ops->setup               = MatSetUp_KAIJ;
1437     A->ops->destroy             = MatDestroy_SeqKAIJ;
1438     A->ops->view                = MatView_KAIJ;
1439     A->ops->mult                = MatMult_SeqKAIJ;
1440     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1441     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1442     A->ops->getrow              = MatGetRow_SeqKAIJ;
1443     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1444     A->ops->sor                 = MatSOR_SeqKAIJ;
1445   } else {
1446     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1447     A->ops->setup               = MatSetUp_KAIJ;
1448     A->ops->destroy             = MatDestroy_MPIKAIJ;
1449     A->ops->view                = MatView_KAIJ;
1450     A->ops->mult                = MatMult_MPIKAIJ;
1451     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1452     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1453     A->ops->getrow              = MatGetRow_MPIKAIJ;
1454     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1455     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1456   }
1457   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1458   PetscFunctionReturn(0);
1459 }
1460