xref: /petsc/src/mat/impls/kaij/kaij.c (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameter:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameter:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the KAIJ matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: MatKAIJGetS(), MatKAIJGetT()
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371 {
372   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373   PetscInt    i,j;
374 
375   PetscFunctionBegin;
376   if (a->p != a->q) {
377     *identity = PETSC_FALSE;
378     PetscFunctionReturn(0);
379   } else *identity = PETSC_TRUE;
380   if (!a->isTI || a->S) {
381     for (i=0; i<a->p && *identity; i++) {
382       for (j=0; j<a->p && *identity; j++) {
383         if (i != j) {
384           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386         } else {
387           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         }
390       }
391     }
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 /*@C
397    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
398 
399    Logically Collective; the entire T is stored independently on all processes.
400 
401    Input Parameters:
402 +  A - the KAIJ matrix
403 .  p - the number of rows in S
404 .  q - the number of columns in S
405 -  T - the T matrix, in form of a scalar array in column-major format
406 
407    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408    The T matrix is copied, so the user can destroy this array.
409 
410    Level: Advanced
411 
412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413 @*/
414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415 {
416   PetscErrorCode ierr;
417   PetscInt       i,j;
418   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419   PetscBool      isTI = PETSC_FALSE;
420 
421   PetscFunctionBegin;
422   /* check if T is an identity matrix */
423   if (T && (p == q)) {
424     isTI = PETSC_TRUE;
425     for (i=0; i<p; i++) {
426       for (j=0; j<q; j++) {
427         if (i == j) {
428           /* diagonal term must be 1 */
429           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430         } else {
431           /* off-diagonal term must be 0 */
432           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433         }
434       }
435     }
436   }
437   a->isTI = isTI;
438 
439   ierr = PetscFree(a->T);CHKERRQ(ierr);
440   if (T && (!isTI)) {
441     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
442     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
443   } else a->T = NULL;
444 
445   a->p = p;
446   a->q = q;
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451 {
452   PetscErrorCode ierr;
453   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
454 
455   PetscFunctionBegin;
456   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
457   ierr = PetscFree(b->S);CHKERRQ(ierr);
458   ierr = PetscFree(b->T);CHKERRQ(ierr);
459   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
460   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
461   ierr = PetscFree(A->data);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 PetscErrorCode MatSetUp_KAIJ(Mat A)
466 {
467   PetscErrorCode ierr;
468   PetscInt       n;
469   PetscMPIInt    size;
470   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
471 
472   PetscFunctionBegin;
473   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
474   if (size == 1) {
475     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
476     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
477     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
478     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
479     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
480   } else {
481     Mat_MPIKAIJ *a;
482     Mat_MPIAIJ  *mpiaij;
483     IS          from,to;
484     Vec         gvec;
485     PetscScalar *T;
486     PetscInt    i,j;
487 
488     a = (Mat_MPIKAIJ*)A->data;
489     mpiaij = (Mat_MPIAIJ*)a->A->data;
490     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
491     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
492     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
493     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
494     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
495 
496     if (a->isTI) {
497       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
498        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
499        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
500        * to pass in. */
501       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
502       for (i=0; i<a->p; i++) {
503         for (j=0; j<a->q; j++) {
504           if (i==j) T[i+j*a->p] = 1.0;
505           else      T[i+j*a->p] = 0.0;
506         }
507       }
508     } else T = a->T;
509     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
510     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
511     if (a->isTI) {
512       ierr = PetscFree(T);CHKERRQ(ierr);
513     }
514 
515     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
516     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
517     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
518     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
519     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
520 
521     /* create two temporary Index sets for build scatter gather */
522     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
523     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
524 
525     /* create temporary global vector to generate scatter context */
526     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
527 
528     /* generate the scatter context */
529     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
530 
531     ierr = ISDestroy(&from);CHKERRQ(ierr);
532     ierr = ISDestroy(&to);CHKERRQ(ierr);
533     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
534   }
535 
536   A->assembled = PETSC_TRUE;
537   PetscFunctionReturn(0);
538 }
539 
540 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
541 {
542   PetscViewerFormat format;
543   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
544   Mat               B;
545   PetscInt          i;
546   PetscErrorCode    ierr;
547   PetscBool         ismpikaij;
548 
549   PetscFunctionBegin;
550   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
551   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
552   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
554 
555     /* Print appropriate details for S. */
556     if (!a->S) {
557       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
558     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
560       for (i=0; i<(a->p * a->q); i++) {
561 #if defined(PETSC_USE_COMPLEX)
562         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
563 #else
564         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
565 #endif
566       }
567       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
568     }
569 
570     /* Print appropriate details for T. */
571     if (a->isTI) {
572       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
573     } else if (!a->T) {
574       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
575     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
577       for (i=0; i<(a->p * a->q); i++) {
578 #if defined(PETSC_USE_COMPLEX)
579         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
580 #else
581         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
582 #endif
583       }
584       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
585     }
586 
587     /* Now print details for the AIJ matrix, using the AIJ viewer. */
588     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
589     if (ismpikaij) {
590       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
592     } else {
593       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
594     }
595 
596   } else {
597     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598     if (ismpikaij) {
599       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
600     } else {
601       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
602     }
603     ierr = MatView(B,viewer);CHKERRQ(ierr);
604     ierr = MatDestroy(&B);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
610 {
611   PetscErrorCode ierr;
612   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
613 
614   PetscFunctionBegin;
615   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
616   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
617   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
618   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
619   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
620   ierr = PetscFree(b->S);CHKERRQ(ierr);
621   ierr = PetscFree(b->T);CHKERRQ(ierr);
622   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
623   ierr = PetscFree(A->data);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 /* --------------------------------------------------------------------------------------*/
628 
629 /* zz = yy + Axx */
630 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
633   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
634   const PetscScalar *s = b->S, *t = b->T;
635   const PetscScalar *x,*v,*bx;
636   PetscScalar       *y,*sums;
637   PetscErrorCode    ierr;
638   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
639   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
640 
641   PetscFunctionBegin;
642   if (!yy) {
643     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
644   } else {
645     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
646   }
647   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
648 
649   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
651   idx  = a->j;
652   v    = a->a;
653   ii   = a->i;
654 
655   if (b->isTI) {
656     for (i=0; i<m; i++) {
657       jrow = ii[i];
658       n    = ii[i+1] - jrow;
659       sums = y + p*i;
660       for (j=0; j<n; j++) {
661         for (k=0; k<p; k++) {
662           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
663         }
664       }
665     }
666     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
667   } else if (t) {
668     for (i=0; i<m; i++) {
669       jrow = ii[i];
670       n    = ii[i+1] - jrow;
671       sums = y + p*i;
672       for (j=0; j<n; j++) {
673         for (k=0; k<p; k++) {
674           for (l=0; l<q; l++) {
675             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
676           }
677         }
678       }
679     }
680     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
681      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
682      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
683      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
684     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
685   }
686   if (s) {
687     for (i=0; i<m; i++) {
688       sums = y + p*i;
689       bx   = x + q*i;
690       if (i < b->AIJ->cmap->n) {
691         for (j=0; j<q; j++) {
692           for (k=0; k<p; k++) {
693             sums[k] += s[k+j*p]*bx[j];
694           }
695         }
696       }
697     }
698     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
699   }
700 
701   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
702   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
707 {
708   PetscErrorCode ierr;
709   PetscFunctionBegin;
710   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #include <petsc/private/kernels/blockinvert.h>
715 
716 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
717 {
718   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
719   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
720   const PetscScalar *S  = b->S;
721   const PetscScalar *T  = b->T;
722   const PetscScalar *v  = a->a;
723   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724   PetscErrorCode    ierr;
725   PetscInt          i,j,*v_pivots,dof,dof2;
726   PetscScalar       *diag,aval,*v_work;
727 
728   PetscFunctionBegin;
729   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
730   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
731 
732   dof  = p;
733   dof2 = dof*dof;
734 
735   if (b->ibdiagvalid) {
736     if (values) *values = b->ibdiag;
737     PetscFunctionReturn(0);
738   }
739   if (!b->ibdiag) {
740     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
741     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
742   }
743   if (values) *values = b->ibdiag;
744   diag = b->ibdiag;
745 
746   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
747   for (i=0; i<m; i++) {
748     if (S) {
749       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
750     } else {
751       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
752     }
753     if (b->isTI) {
754       aval = 0;
755       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
756       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
757     } else if (T) {
758       aval = 0;
759       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
760       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
761     }
762     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
763     diag += dof2;
764   }
765   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
766 
767   b->ibdiagvalid = PETSC_TRUE;
768   PetscFunctionReturn(0);
769 }
770 
771 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
772 {
773   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
774 
775   PetscFunctionBegin;
776   *B = kaij->AIJ;
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
781 {
782   PetscErrorCode    ierr;
783   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
784   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
785   const PetscScalar *aa = a->a, *T = kaij->T, *v;
786   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
787   const PetscScalar *b, *xb, *idiag;
788   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
789   PetscInt          i, j, k, i2, bs, bs2, nz;
790 
791   PetscFunctionBegin;
792   its = its*lits;
793   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
794   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
795   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
796   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
797   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
798   else        {bs = p; bs2 = bs*bs; }
799 
800   if (!m) PetscFunctionReturn(0);
801 
802   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
803   idiag = kaij->ibdiag;
804   diag  = a->diag;
805 
806   if (!kaij->sor.setup) {
807     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
808     kaij->sor.setup = PETSC_TRUE;
809   }
810   y     = kaij->sor.y;
811   w     = kaij->sor.w;
812   work  = kaij->sor.work;
813   t     = kaij->sor.t;
814   arr   = kaij->sor.arr;
815 
816   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
817   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
818 
819   if (flag & SOR_ZERO_INITIAL_GUESS) {
820     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
821       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
822       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
823       i2     =  bs;
824       idiag  += bs2;
825       for (i=1; i<m; i++) {
826         v  = aa + ai[i];
827         vi = aj + ai[i];
828         nz = diag[i] - ai[i];
829 
830         if (T) {                /* b - T (Arow * x) */
831           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
832           for (j=0; j<nz; j++) {
833             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
834           }
835           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
836           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
837         } else if (kaij->isTI) {
838           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
839           for (j=0; j<nz; j++) {
840             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
841           }
842         } else {
843           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
844         }
845 
846         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
847         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
848 
849         idiag += bs2;
850         i2    += bs;
851       }
852       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
853       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
854       xb = t;
855     } else xb = b;
856     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
857       idiag = kaij->ibdiag+bs2*(m-1);
858       i2    = bs * (m-1);
859       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
860       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
861       i2    -= bs;
862       idiag -= bs2;
863       for (i=m-2; i>=0; i--) {
864         v  = aa + diag[i] + 1 ;
865         vi = aj + diag[i] + 1;
866         nz = ai[i+1] - diag[i] - 1;
867 
868         if (T) {                /* FIXME: This branch untested */
869           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
874             workt += bs;
875           }
876           arrt = arr;
877           for (j=0; j<nz; j++) {
878             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
879             for (k=0; k<bs2; k++) arrt[k] *= v[j];
880             arrt += bs2;
881           }
882           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
883         } else if (kaij->isTI) {
884           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           for (j=0; j<nz; j++) {
886             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
887           }
888         }
889 
890         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
891         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
892 
893         idiag -= bs2;
894         i2    -= bs;
895       }
896       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
897     }
898     its--;
899   }
900   while (its--) {               /* FIXME: This branch not updated */
901     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
902       i2     =  0;
903       idiag  = kaij->ibdiag;
904       for (i=0; i<m; i++) {
905         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
906 
907         v  = aa + ai[i];
908         vi = aj + ai[i];
909         nz = diag[i] - ai[i];
910         workt = work;
911         for (j=0; j<nz; j++) {
912           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
913           workt += bs;
914         }
915         arrt = arr;
916         if (T) {
917           for (j=0; j<nz; j++) {
918             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
919             for (k=0; k<bs2; k++) arrt[k] *= v[j];
920             arrt += bs2;
921           }
922           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
923         } else if (kaij->isTI) {
924           for (j=0; j<nz; j++) {
925             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
926             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
927             arrt += bs2;
928           }
929           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
930         }
931         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
932 
933         v  = aa + diag[i] + 1;
934         vi = aj + diag[i] + 1;
935         nz = ai[i+1] - diag[i] - 1;
936         workt = work;
937         for (j=0; j<nz; j++) {
938           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
939           workt += bs;
940         }
941         arrt = arr;
942         if (T) {
943           for (j=0; j<nz; j++) {
944             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
945             for (k=0; k<bs2; k++) arrt[k] *= v[j];
946             arrt += bs2;
947           }
948           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949         } else if (kaij->isTI) {
950           for (j=0; j<nz; j++) {
951             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
952             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953             arrt += bs2;
954           }
955           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956         }
957 
958         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
959         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
960 
961         idiag += bs2;
962         i2    += bs;
963       }
964       xb = t;
965     }
966     else xb = b;
967     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
968       idiag = kaij->ibdiag+bs2*(m-1);
969       i2    = bs * (m-1);
970       if (xb == b) {
971         for (i=m-1; i>=0; i--) {
972           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
973 
974           v  = aa + ai[i];
975           vi = aj + ai[i];
976           nz = diag[i] - ai[i];
977           workt = work;
978           for (j=0; j<nz; j++) {
979             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
980             workt += bs;
981           }
982           arrt = arr;
983           if (T) {
984             for (j=0; j<nz; j++) {
985               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
986               for (k=0; k<bs2; k++) arrt[k] *= v[j];
987               arrt += bs2;
988             }
989             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
990           } else if (kaij->isTI) {
991             for (j=0; j<nz; j++) {
992               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
993               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
994               arrt += bs2;
995             }
996             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
997           }
998 
999           v  = aa + diag[i] + 1;
1000           vi = aj + diag[i] + 1;
1001           nz = ai[i+1] - diag[i] - 1;
1002           workt = work;
1003           for (j=0; j<nz; j++) {
1004             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1005             workt += bs;
1006           }
1007           arrt = arr;
1008           if (T) {
1009             for (j=0; j<nz; j++) {
1010               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1011               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012               arrt += bs2;
1013             }
1014             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015           } else if (kaij->isTI) {
1016             for (j=0; j<nz; j++) {
1017               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1018               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019               arrt += bs2;
1020             }
1021             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022           }
1023 
1024           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1025           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1026         }
1027       } else {
1028         for (i=m-1; i>=0; i--) {
1029           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1030           v  = aa + diag[i] + 1;
1031           vi = aj + diag[i] + 1;
1032           nz = ai[i+1] - diag[i] - 1;
1033           workt = work;
1034           for (j=0; j<nz; j++) {
1035             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1036             workt += bs;
1037           }
1038           arrt = arr;
1039           if (T) {
1040             for (j=0; j<nz; j++) {
1041               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1042               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1043               arrt += bs2;
1044             }
1045             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1046           } else if (kaij->isTI) {
1047             for (j=0; j<nz; j++) {
1048               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1049               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1050               arrt += bs2;
1051             }
1052             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053           }
1054           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1055           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1056         }
1057       }
1058       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1059     }
1060   }
1061 
1062   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1063   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*===================================================================================*/
1068 
1069 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1070 {
1071   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   if (!yy) {
1076     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1077   } else {
1078     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1079   }
1080   /* start the scatter */
1081   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1083   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1089 {
1090   PetscErrorCode ierr;
1091   PetscFunctionBegin;
1092   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1097 {
1098   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1099   PetscErrorCode  ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* ----------------------------------------------------------------*/
1107 
1108 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1109 {
1110   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1111   PetscErrorCode  diag = PETSC_FALSE;
1112   PetscErrorCode  ierr;
1113   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1114   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1115 
1116   PetscFunctionBegin;
1117   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1118   b->getrowactive = PETSC_TRUE;
1119   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1120 
1121   if ((!S) && (!T) && (!b->isTI)) {
1122     if (ncols)    *ncols  = 0;
1123     if (cols)     *cols   = NULL;
1124     if (values)   *values = NULL;
1125     PetscFunctionReturn(0);
1126   }
1127 
1128   if (T || b->isTI) {
1129     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1130     c     = nzaij;
1131     for (i=0; i<nzaij; i++) {
1132       /* check if this row contains a diagonal entry */
1133       if (colsaij[i] == r) {
1134         diag = PETSC_TRUE;
1135         c = i;
1136       }
1137     }
1138   } else nzaij = c = 0;
1139 
1140   /* calculate size of row */
1141   nz = 0;
1142   if (S)            nz += q;
1143   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1144 
1145   if (cols || values) {
1146     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1147     for (i=0; i<q; i++) {
1148       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1149       v[i] = 0.0;
1150     }
1151     if (b->isTI) {
1152       for (i=0; i<nzaij; i++) {
1153         for (j=0; j<q; j++) {
1154           idx[i*q+j] = colsaij[i]*q+j;
1155           v[i*q+j]   = (j==s ? vaij[i] : 0);
1156         }
1157       }
1158     } else if (T) {
1159       for (i=0; i<nzaij; i++) {
1160         for (j=0; j<q; j++) {
1161           idx[i*q+j] = colsaij[i]*q+j;
1162           v[i*q+j]   = vaij[i]*T[s+j*p];
1163         }
1164       }
1165     }
1166     if (S) {
1167       for (j=0; j<q; j++) {
1168         idx[c*q+j] = r*q+j;
1169         v[c*q+j]  += S[s+j*p];
1170       }
1171     }
1172   }
1173 
1174   if (ncols)    *ncols  = nz;
1175   if (cols)     *cols   = idx;
1176   if (values)   *values = v;
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1181 {
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   if (nz) *nz = 0;
1186   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1187   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1192 {
1193   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1194   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1195   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1196   Mat             AIJ     = b->A;
1197   PetscBool       diag    = PETSC_FALSE;
1198   PetscErrorCode  ierr;
1199   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1200   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1201   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1202 
1203   PetscFunctionBegin;
1204   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1205   b->getrowactive = PETSC_TRUE;
1206   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1207   lrow = row - rstart;
1208 
1209   if ((!S) && (!T) && (!b->isTI)) {
1210     if (ncols)    *ncols  = 0;
1211     if (cols)     *cols   = NULL;
1212     if (values)   *values = NULL;
1213     PetscFunctionReturn(0);
1214   }
1215 
1216   r = lrow/p;
1217   s = lrow%p;
1218 
1219   if (T || b->isTI) {
1220     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1221     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1222     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1223 
1224     c     = ncolsaij + ncolsoaij;
1225     for (i=0; i<ncolsaij; i++) {
1226       /* check if this row contains a diagonal entry */
1227       if (colsaij[i] == r) {
1228         diag = PETSC_TRUE;
1229         c = i;
1230       }
1231     }
1232   } else c = 0;
1233 
1234   /* calculate size of row */
1235   nz = 0;
1236   if (S)            nz += q;
1237   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1238 
1239   if (cols || values) {
1240     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1241     for (i=0; i<q; i++) {
1242       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1243       v[i] = 0.0;
1244     }
1245     if (b->isTI) {
1246       for (i=0; i<ncolsaij; i++) {
1247         for (j=0; j<q; j++) {
1248           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1249           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1250         }
1251       }
1252       for (i=0; i<ncolsoaij; i++) {
1253         for (j=0; j<q; j++) {
1254           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1255           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1256         }
1257       }
1258     } else if (T) {
1259       for (i=0; i<ncolsaij; i++) {
1260         for (j=0; j<q; j++) {
1261           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1262           v[i*q+j]   = vals[i]*T[s+j*p];
1263         }
1264       }
1265       for (i=0; i<ncolsoaij; i++) {
1266         for (j=0; j<q; j++) {
1267           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1268           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1269         }
1270       }
1271     }
1272     if (S) {
1273       for (j=0; j<q; j++) {
1274         idx[c*q+j] = (r+rstart/p)*q+j;
1275         v[c*q+j]  += S[s+j*p];
1276       }
1277     }
1278   }
1279 
1280   if (ncols)  *ncols  = nz;
1281   if (cols)   *cols   = idx;
1282   if (values) *values = v;
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1287 {
1288   PetscErrorCode ierr;
1289   PetscFunctionBegin;
1290   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1291   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1296 {
1297   PetscErrorCode ierr;
1298   Mat            A;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1302   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1303   ierr = MatDestroy(&A);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /* ---------------------------------------------------------------------------------- */
1308 /*@C
1309   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1310 
1311     [I \otimes S + A \otimes T]
1312 
1313   where
1314     S is a dense (p \times q) matrix
1315     T is a dense (p \times q) matrix
1316     A is an AIJ  (n \times n) matrix
1317     I is the identity matrix
1318   The resulting matrix is (np \times nq)
1319 
1320   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1321 
1322   Collective
1323 
1324   Input Parameters:
1325 + A - the AIJ matrix
1326 . p - number of rows in S and T
1327 . q - number of columns in S and T
1328 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1329 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1330 
1331   Output Parameter:
1332 . kaij - the new KAIJ matrix
1333 
1334   Notes:
1335   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1336   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1337 
1338   Level: advanced
1339 
1340 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1341 @*/
1342 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1343 {
1344   PetscErrorCode ierr;
1345   PetscMPIInt    size;
1346 
1347   PetscFunctionBegin;
1348   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1349   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1350   if (size == 1) {
1351     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1352   } else {
1353     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1354   }
1355   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1356   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1357   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1358   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 /*MC
1363   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1364     [I \otimes S + A \otimes T],
1365   where
1366     S is a dense (p \times q) matrix,
1367     T is a dense (p \times q) matrix,
1368     A is an AIJ  (n \times n) matrix,
1369     and I is the identity matrix.
1370   The resulting matrix is (np \times nq).
1371 
1372   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1373 
1374   Notes:
1375   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1376   where x and b are column vectors containing the row-major representations of X and B.
1377 
1378   Level: advanced
1379 
1380 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1381 M*/
1382 
1383 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1384 {
1385   PetscErrorCode ierr;
1386   Mat_MPIKAIJ    *b;
1387   PetscMPIInt    size;
1388 
1389   PetscFunctionBegin;
1390   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1391   A->data  = (void*)b;
1392 
1393   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1394 
1395   A->ops->setup = MatSetUp_KAIJ;
1396 
1397   b->w    = NULL;
1398   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1399   if (size == 1) {
1400     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1401     A->ops->setup               = MatSetUp_KAIJ;
1402     A->ops->destroy             = MatDestroy_SeqKAIJ;
1403     A->ops->view                = MatView_KAIJ;
1404     A->ops->mult                = MatMult_SeqKAIJ;
1405     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1406     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1407     A->ops->getrow              = MatGetRow_SeqKAIJ;
1408     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1409     A->ops->sor                 = MatSOR_SeqKAIJ;
1410   } else {
1411     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1412     A->ops->setup               = MatSetUp_KAIJ;
1413     A->ops->destroy             = MatDestroy_MPIKAIJ;
1414     A->ops->view                = MatView_KAIJ;
1415     A->ops->mult                = MatMult_MPIKAIJ;
1416     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1417     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1418     A->ops->getrow              = MatGetRow_MPIKAIJ;
1419     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1420     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1421   }
1422   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1423   PetscFunctionReturn(0);
1424 }
1425