xref: /petsc/src/mat/impls/kaij/kaij.c (revision bef158480efac06de457f7a665168877ab3c2fd7)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameter:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameter:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the KAIJ matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: MatKAIJGetS(), MatKAIJGetT()
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371 {
372   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373   PetscInt    i,j;
374 
375   PetscFunctionBegin;
376   if (a->p != a->q) {
377     *identity = PETSC_FALSE;
378     PetscFunctionReturn(0);
379   } else *identity = PETSC_TRUE;
380   if (!a->isTI || a->S) {
381     for (i=0; i<a->p && *identity; i++) {
382       for (j=0; j<a->p && *identity; j++) {
383         if (i != j) {
384           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386         } else {
387           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         }
390       }
391     }
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 /*@C
397    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
398 
399    Logically Collective; the entire T is stored independently on all processes.
400 
401    Input Parameters:
402 +  A - the KAIJ matrix
403 .  p - the number of rows in S
404 .  q - the number of columns in S
405 -  T - the T matrix, in form of a scalar array in column-major format
406 
407    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408    The T matrix is copied, so the user can destroy this array.
409 
410    Level: Advanced
411 
412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413 @*/
414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415 {
416   PetscErrorCode ierr;
417   PetscInt       i,j;
418   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419   PetscBool      isTI = PETSC_FALSE;
420 
421   PetscFunctionBegin;
422   /* check if T is an identity matrix */
423   if (T && (p == q)) {
424     isTI = PETSC_TRUE;
425     for (i=0; i<p; i++) {
426       for (j=0; j<q; j++) {
427         if (i == j) {
428           /* diagonal term must be 1 */
429           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430         } else {
431           /* off-diagonal term must be 0 */
432           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433         }
434       }
435     }
436   }
437   a->isTI = isTI;
438 
439   ierr = PetscFree(a->T);CHKERRQ(ierr);
440   if (T && (!isTI)) {
441     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
442     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
443   } else a->T = NULL;
444 
445   a->p = p;
446   a->q = q;
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451 {
452   PetscErrorCode ierr;
453   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
454 
455   PetscFunctionBegin;
456   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
457   ierr = PetscFree(b->S);CHKERRQ(ierr);
458   ierr = PetscFree(b->T);CHKERRQ(ierr);
459   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
460   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
461   ierr = PetscFree(A->data);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 PetscErrorCode MatSetUp_KAIJ(Mat A)
466 {
467   PetscErrorCode ierr;
468   PetscInt       n;
469   PetscMPIInt    size;
470   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
471 
472   PetscFunctionBegin;
473   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
474   if (size == 1) {
475     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
476     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
477     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
478     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
479     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
480   } else {
481     Mat_MPIKAIJ *a;
482     Mat_MPIAIJ  *mpiaij;
483     IS          from,to;
484     Vec         gvec;
485     PetscScalar *T;
486     PetscInt    i,j;
487 
488     a = (Mat_MPIKAIJ*)A->data;
489     mpiaij = (Mat_MPIAIJ*)a->A->data;
490     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
491     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
492     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
493     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
494     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
495 
496     if (a->isTI) {
497       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
498        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
499        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
500        * to pass in. */
501       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
502       for (i=0; i<a->p; i++) {
503         for (j=0; j<a->q; j++) {
504           if (i==j) T[i+j*a->p] = 1.0;
505           else      T[i+j*a->p] = 0.0;
506         }
507       }
508     } else T = a->T;
509     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
510     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
511     if (a->isTI) {
512       ierr = PetscFree(T);CHKERRQ(ierr);
513     }
514 
515     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
516     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
517     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
518     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
519     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
520 
521     /* create two temporary Index sets for build scatter gather */
522     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
523     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
524 
525     /* create temporary global vector to generate scatter context */
526     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
527 
528     /* generate the scatter context */
529     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
530 
531     ierr = ISDestroy(&from);CHKERRQ(ierr);
532     ierr = ISDestroy(&to);CHKERRQ(ierr);
533     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
534   }
535 
536   A->assembled = PETSC_TRUE;
537   PetscFunctionReturn(0);
538 }
539 
540 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
541 {
542   PetscViewerFormat format;
543   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
544   Mat               B;
545   PetscInt          i;
546   PetscErrorCode    ierr;
547   PetscBool         ismpikaij;
548 
549   PetscFunctionBegin;
550   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
551   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
552   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
554 
555     /* Print appropriate details for S. */
556     if (!a->S) {
557       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
558     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
560       for (i=0; i<(a->p * a->q); i++) {
561 #if defined(PETSC_USE_COMPLEX)
562         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
563 #else
564         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
565 #endif
566       }
567       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
568     }
569 
570     /* Print appropriate details for T. */
571     if (a->isTI) {
572       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
573     } else if (!a->T) {
574       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
575     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
577       for (i=0; i<(a->p * a->q); i++) {
578 #if defined(PETSC_USE_COMPLEX)
579         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
580 #else
581         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
582 #endif
583       }
584       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
585     }
586 
587     /* Now print details for the AIJ matrix, using the AIJ viewer. */
588     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
589     if (ismpikaij) {
590       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
592     } else {
593       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
594     }
595 
596   } else {
597     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598     if (ismpikaij) {
599       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
600     } else {
601       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
602     }
603     ierr = MatView(B,viewer);CHKERRQ(ierr);
604     ierr = MatDestroy(&B);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
610 {
611   PetscErrorCode ierr;
612   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
613 
614   PetscFunctionBegin;
615   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
616   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
617   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
618   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
619   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
620   ierr = PetscFree(b->S);CHKERRQ(ierr);
621   ierr = PetscFree(b->T);CHKERRQ(ierr);
622   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
623   ierr = PetscFree(A->data);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 /* --------------------------------------------------------------------------------------*/
628 
629 /* zz = yy + Axx */
630 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
633   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
634   const PetscScalar *s = b->S, *t = b->T;
635   const PetscScalar *x,*v,*bx;
636   PetscScalar       *y,*sums;
637   PetscErrorCode    ierr;
638   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
639   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
640 
641   PetscFunctionBegin;
642   if (!yy) {
643     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
644   } else {
645     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
646   }
647   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
648 
649   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
651   idx  = a->j;
652   v    = a->a;
653   ii   = a->i;
654 
655   if (b->isTI) {
656     for (i=0; i<m; i++) {
657       jrow = ii[i];
658       n    = ii[i+1] - jrow;
659       sums = y + p*i;
660       for (j=0; j<n; j++) {
661         for (k=0; k<p; k++) {
662           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
663         }
664       }
665     }
666     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
667   } else if (t) {
668     for (i=0; i<m; i++) {
669       jrow = ii[i];
670       n    = ii[i+1] - jrow;
671       sums = y + p*i;
672       for (j=0; j<n; j++) {
673         for (k=0; k<p; k++) {
674           for (l=0; l<q; l++) {
675             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
676           }
677         }
678       }
679     }
680     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
681      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
682      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
683      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
684     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
685   }
686   if (s) {
687     for (i=0; i<m; i++) {
688       sums = y + p*i;
689       bx   = x + q*i;
690       if (i < b->AIJ->cmap->n) {
691         for (j=0; j<q; j++) {
692           for (k=0; k<p; k++) {
693             sums[k] += s[k+j*p]*bx[j];
694           }
695         }
696       }
697     }
698     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
699   }
700 
701   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
702   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
707 {
708   PetscErrorCode ierr;
709   PetscFunctionBegin;
710   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #include <petsc/private/kernels/blockinvert.h>
715 
716 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
717 {
718   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
719   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
720   const PetscScalar *S  = b->S;
721   const PetscScalar *T  = b->T;
722   const PetscScalar *v  = a->a;
723   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724   PetscErrorCode    ierr;
725   PetscInt          i,j,*v_pivots,dof,dof2;
726   PetscScalar       *diag,aval,*v_work;
727 
728   PetscFunctionBegin;
729   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
730   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
731 
732   dof  = p;
733   dof2 = dof*dof;
734 
735   if (b->ibdiagvalid) {
736     if (values) *values = b->ibdiag;
737     PetscFunctionReturn(0);
738   }
739   if (!b->ibdiag) {
740     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
741     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
742   }
743   if (values) *values = b->ibdiag;
744   diag = b->ibdiag;
745 
746   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
747   for (i=0; i<m; i++) {
748     if (S) {
749       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
750     } else {
751       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
752     }
753     if (b->isTI) {
754       aval = 0;
755       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
756       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
757     } else if (T) {
758       aval = 0;
759       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
760       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
761     }
762     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
763     diag += dof2;
764   }
765   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
766 
767   b->ibdiagvalid = PETSC_TRUE;
768   PetscFunctionReturn(0);
769 }
770 
771 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
772 {
773   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
774 
775   PetscFunctionBegin;
776   *B = kaij->AIJ;
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
781 {
782   PetscErrorCode    ierr;
783   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
784   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
785   const PetscScalar *aa = a->a, *T = kaij->T, *v;
786   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
787   const PetscScalar *b, *xb, *idiag;
788   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
789   PetscInt          i, j, k, i2, bs, bs2, nz;
790 
791   PetscFunctionBegin;
792   its = its*lits;
793   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
794   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
795   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
796   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
797   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
798   else        {bs = p; bs2 = bs*bs; }
799 
800   if (!m) PetscFunctionReturn(0);
801 
802   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
803   idiag = kaij->ibdiag;
804   diag  = a->diag;
805 
806   if (!kaij->sor.setup) {
807     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
808     kaij->sor.setup = PETSC_TRUE;
809   }
810   y     = kaij->sor.y;
811   w     = kaij->sor.w;
812   work  = kaij->sor.work;
813   t     = kaij->sor.t;
814   arr   = kaij->sor.arr;
815 
816   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
817   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
818 
819   if (flag & SOR_ZERO_INITIAL_GUESS) {
820     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
821       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
822       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
823       i2     =  bs;
824       idiag  += bs2;
825       for (i=1; i<m; i++) {
826         v  = aa + ai[i];
827         vi = aj + ai[i];
828         nz = diag[i] - ai[i];
829 
830         if (T) {                /* b - T (Arow * x) */
831           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
832           for (j=0; j<nz; j++) {
833             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
834           }
835           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
836           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
837         } else if (kaij->isTI) {
838           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
839           for (j=0; j<nz; j++) {
840             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
841           }
842         } else {
843           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
844         }
845 
846         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
847         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
848 
849         idiag += bs2;
850         i2    += bs;
851       }
852       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
853       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
854       xb = t;
855     } else xb = b;
856     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
857       idiag = kaij->ibdiag+bs2*(m-1);
858       i2    = bs * (m-1);
859       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
860       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
861       i2    -= bs;
862       idiag -= bs2;
863       for (i=m-2; i>=0; i--) {
864         v  = aa + diag[i] + 1 ;
865         vi = aj + diag[i] + 1;
866         nz = ai[i+1] - diag[i] - 1;
867 
868         if (T) {                /* FIXME: This branch untested */
869           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
874             workt += bs;
875           }
876           arrt = arr;
877           for (j=0; j<nz; j++) {
878             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
879             for (k=0; k<bs2; k++) arrt[k] *= v[j];
880             arrt += bs2;
881           }
882           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
883         } else if (kaij->isTI) {
884           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           for (j=0; j<nz; j++) {
886             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
887           }
888         }
889 
890         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
891         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
892 
893         idiag -= bs2;
894         i2    -= bs;
895       }
896       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
897     }
898     its--;
899   }
900   while (its--) {               /* FIXME: This branch not updated */
901     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
902       i2     =  0;
903       idiag  = kaij->ibdiag;
904       for (i=0; i<m; i++) {
905         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
906 
907         v  = aa + ai[i];
908         vi = aj + ai[i];
909         nz = diag[i] - ai[i];
910         workt = work;
911         for (j=0; j<nz; j++) {
912           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
913           workt += bs;
914         }
915         arrt = arr;
916         if (T) {
917           for (j=0; j<nz; j++) {
918             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
919             for (k=0; k<bs2; k++) arrt[k] *= v[j];
920             arrt += bs2;
921           }
922           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
923         } else if (kaij->isTI) {
924           for (j=0; j<nz; j++) {
925             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
926             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
927             arrt += bs2;
928           }
929           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
930         }
931         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
932 
933         v  = aa + diag[i] + 1;
934         vi = aj + diag[i] + 1;
935         nz = ai[i+1] - diag[i] - 1;
936         workt = work;
937         for (j=0; j<nz; j++) {
938           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
939           workt += bs;
940         }
941         arrt = arr;
942         if (T) {
943           for (j=0; j<nz; j++) {
944             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
945             for (k=0; k<bs2; k++) arrt[k] *= v[j];
946             arrt += bs2;
947           }
948           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949         } else if (kaij->isTI) {
950           for (j=0; j<nz; j++) {
951             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
952             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953             arrt += bs2;
954           }
955           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956         }
957 
958         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
959         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
960 
961         idiag += bs2;
962         i2    += bs;
963       }
964       xb = t;
965     }
966     else xb = b;
967     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
968       idiag = kaij->ibdiag+bs2*(m-1);
969       i2    = bs * (m-1);
970       if (xb == b) {
971         for (i=m-1; i>=0; i--) {
972           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
973 
974           v  = aa + ai[i];
975           vi = aj + ai[i];
976           nz = diag[i] - ai[i];
977           workt = work;
978           for (j=0; j<nz; j++) {
979             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
980             workt += bs;
981           }
982           arrt = arr;
983           if (T) {
984             for (j=0; j<nz; j++) {
985               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
986               for (k=0; k<bs2; k++) arrt[k] *= v[j];
987               arrt += bs2;
988             }
989             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
990           } else if (kaij->isTI) {
991             for (j=0; j<nz; j++) {
992               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
993               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
994               arrt += bs2;
995             }
996             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
997           }
998 
999           v  = aa + diag[i] + 1;
1000           vi = aj + diag[i] + 1;
1001           nz = ai[i+1] - diag[i] - 1;
1002           workt = work;
1003           for (j=0; j<nz; j++) {
1004             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1005             workt += bs;
1006           }
1007           arrt = arr;
1008           if (T) {
1009             for (j=0; j<nz; j++) {
1010               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1011               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012               arrt += bs2;
1013             }
1014             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015           } else if (kaij->isTI) {
1016             for (j=0; j<nz; j++) {
1017               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1018               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019               arrt += bs2;
1020             }
1021             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022           }
1023 
1024           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1025           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1026         }
1027       } else {
1028         for (i=m-1; i>=0; i--) {
1029           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1030           v  = aa + diag[i] + 1;
1031           vi = aj + diag[i] + 1;
1032           nz = ai[i+1] - diag[i] - 1;
1033           workt = work;
1034           for (j=0; j<nz; j++) {
1035             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1036             workt += bs;
1037           }
1038           arrt = arr;
1039           if (T) {
1040             for (j=0; j<nz; j++) {
1041               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1042               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1043               arrt += bs2;
1044             }
1045             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1046           } else if (kaij->isTI) {
1047             for (j=0; j<nz; j++) {
1048               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1049               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1050               arrt += bs2;
1051             }
1052             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053           }
1054           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1055           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1056         }
1057       }
1058       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1059     }
1060   }
1061 
1062   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1063   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*===================================================================================*/
1068 
1069 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1070 {
1071   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   if (!yy) {
1076     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1077   } else {
1078     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1079   }
1080   /* start the scatter */
1081   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1083   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1089 {
1090   PetscErrorCode ierr;
1091   PetscFunctionBegin;
1092   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1097 {
1098   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1099   PetscErrorCode  ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* ----------------------------------------------------------------*/
1107 
1108 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1109 {
1110   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1111   PetscErrorCode  diag = PETSC_FALSE;
1112   PetscErrorCode  ierr;
1113   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1114   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1115 
1116   PetscFunctionBegin;
1117   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1118   b->getrowactive = PETSC_TRUE;
1119   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1120 
1121   if ((!S) && (!T) && (!b->isTI)) {
1122     if (ncols)    *ncols  = 0;
1123     if (cols)     *cols   = NULL;
1124     if (values)   *values = NULL;
1125     PetscFunctionReturn(0);
1126   }
1127 
1128   if (T || b->isTI) {
1129     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1130     c     = nzaij;
1131     for (i=0; i<nzaij; i++) {
1132       /* check if this row contains a diagonal entry */
1133       if (colsaij[i] == r) {
1134         diag = PETSC_TRUE;
1135         c = i;
1136       }
1137     }
1138   } else nzaij = c = 0;
1139 
1140   /* calculate size of row */
1141   nz = 0;
1142   if (S)            nz += q;
1143   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1144 
1145   if (cols || values) {
1146     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1147     for (i=0; i<q; i++) {
1148       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1149       v[i] = 0.0;
1150     }
1151     if (b->isTI) {
1152       for (i=0; i<nzaij; i++) {
1153         for (j=0; j<q; j++) {
1154           idx[i*q+j] = colsaij[i]*q+j;
1155           v[i*q+j]   = (j==s ? vaij[i] : 0);
1156         }
1157       }
1158     } else if (T) {
1159       for (i=0; i<nzaij; i++) {
1160         for (j=0; j<q; j++) {
1161           idx[i*q+j] = colsaij[i]*q+j;
1162           v[i*q+j]   = vaij[i]*T[s+j*p];
1163         }
1164       }
1165     }
1166     if (S) {
1167       for (j=0; j<q; j++) {
1168         idx[c*q+j] = r*q+j;
1169         v[c*q+j]  += S[s+j*p];
1170       }
1171     }
1172   }
1173 
1174   if (ncols)    *ncols  = nz;
1175   if (cols)     *cols   = idx;
1176   if (values)   *values = v;
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1181 {
1182   PetscErrorCode ierr;
1183   PetscFunctionBegin;
1184   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1185   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1190 {
1191   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1192   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1193   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1194   Mat             AIJ     = b->A;
1195   PetscBool       diag    = PETSC_FALSE;
1196   PetscErrorCode  ierr;
1197   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1198   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1199   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1200 
1201   PetscFunctionBegin;
1202   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1203   b->getrowactive = PETSC_TRUE;
1204   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1205   lrow = row - rstart;
1206 
1207   if ((!S) && (!T) && (!b->isTI)) {
1208     if (ncols)    *ncols  = 0;
1209     if (cols)     *cols   = NULL;
1210     if (values)   *values = NULL;
1211     PetscFunctionReturn(0);
1212   }
1213 
1214   r = lrow/p;
1215   s = lrow%p;
1216 
1217   if (T || b->isTI) {
1218     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1219     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1220     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1221 
1222     c     = ncolsaij + ncolsoaij;
1223     for (i=0; i<ncolsaij; i++) {
1224       /* check if this row contains a diagonal entry */
1225       if (colsaij[i] == r) {
1226         diag = PETSC_TRUE;
1227         c = i;
1228       }
1229     }
1230   } else c = 0;
1231 
1232   /* calculate size of row */
1233   nz = 0;
1234   if (S)            nz += q;
1235   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1236 
1237   if (cols || values) {
1238     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1239     for (i=0; i<q; i++) {
1240       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1241       v[i] = 0.0;
1242     }
1243     if (b->isTI) {
1244       for (i=0; i<ncolsaij; i++) {
1245         for (j=0; j<q; j++) {
1246           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1247           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1248         }
1249       }
1250       for (i=0; i<ncolsoaij; i++) {
1251         for (j=0; j<q; j++) {
1252           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1253           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1254         }
1255       }
1256     } else if (T) {
1257       for (i=0; i<ncolsaij; i++) {
1258         for (j=0; j<q; j++) {
1259           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1260           v[i*q+j]   = vals[i]*T[s+j*p];
1261         }
1262       }
1263       for (i=0; i<ncolsoaij; i++) {
1264         for (j=0; j<q; j++) {
1265           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1266           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1267         }
1268       }
1269     }
1270     if (S) {
1271       for (j=0; j<q; j++) {
1272         idx[c*q+j] = (r+rstart/p)*q+j;
1273         v[c*q+j]  += S[s+j*p];
1274       }
1275     }
1276   }
1277 
1278   if (ncols)  *ncols  = nz;
1279   if (cols)   *cols   = idx;
1280   if (values) *values = v;
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1285 {
1286   PetscErrorCode ierr;
1287   PetscFunctionBegin;
1288   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1289   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1294 {
1295   PetscErrorCode ierr;
1296   Mat            A;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1300   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1301   ierr = MatDestroy(&A);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /* ---------------------------------------------------------------------------------- */
1306 /*@C
1307   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1308 
1309     [I \otimes S + A \otimes T]
1310 
1311   where
1312     S is a dense (p \times q) matrix
1313     T is a dense (p \times q) matrix
1314     A is an AIJ  (n \times n) matrix
1315     I is the identity matrix
1316   The resulting matrix is (np \times nq)
1317 
1318   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1319 
1320   Collective
1321 
1322   Input Parameters:
1323 + A - the AIJ matrix
1324 . p - number of rows in S and T
1325 . q - number of columns in S and T
1326 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1327 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1328 
1329   Output Parameter:
1330 . kaij - the new KAIJ matrix
1331 
1332   Notes:
1333   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1334   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1335 
1336   Level: advanced
1337 
1338 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1339 @*/
1340 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1341 {
1342   PetscErrorCode ierr;
1343   PetscMPIInt    size;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1347   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1348   if (size == 1) {
1349     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1350   } else {
1351     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1352   }
1353   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1354   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1355   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1356   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 /*MC
1361   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1362     [I \otimes S + A \otimes T],
1363   where
1364     S is a dense (p \times q) matrix,
1365     T is a dense (p \times q) matrix,
1366     A is an AIJ  (n \times n) matrix,
1367     and I is the identity matrix.
1368   The resulting matrix is (np \times nq).
1369 
1370   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1371 
1372   Notes:
1373   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1374   where x and b are column vectors containing the row-major representations of X and B.
1375 
1376   Level: advanced
1377 
1378 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1379 M*/
1380 
1381 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1382 {
1383   PetscErrorCode ierr;
1384   Mat_MPIKAIJ    *b;
1385   PetscMPIInt    size;
1386 
1387   PetscFunctionBegin;
1388   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1389   A->data  = (void*)b;
1390 
1391   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1392 
1393   A->ops->setup = MatSetUp_KAIJ;
1394 
1395   b->w    = NULL;
1396   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1397   if (size == 1) {
1398     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1399     A->ops->setup               = MatSetUp_KAIJ;
1400     A->ops->destroy             = MatDestroy_SeqKAIJ;
1401     A->ops->view                = MatView_KAIJ;
1402     A->ops->mult                = MatMult_SeqKAIJ;
1403     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1404     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1405     A->ops->getrow              = MatGetRow_SeqKAIJ;
1406     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1407     A->ops->sor                 = MatSOR_SeqKAIJ;
1408   } else {
1409     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1410     A->ops->setup               = MatSetUp_KAIJ;
1411     A->ops->destroy             = MatDestroy_MPIKAIJ;
1412     A->ops->view                = MatView_KAIJ;
1413     A->ops->mult                = MatMult_MPIKAIJ;
1414     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1415     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1416     A->ops->getrow              = MatGetRow_MPIKAIJ;
1417     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1418     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1419   }
1420   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1421   PetscFunctionReturn(0);
1422 }
1423