xref: /petsc/src/mat/impls/kaij/kaij.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameter:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameter:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
357 
358    Logically Collective; the entire T is stored independently on all processes.
359 
360    Input Parameters:
361 +  A - the KAIJ matrix
362 .  p - the number of rows in S
363 .  q - the number of columns in S
364 -  T - the T matrix, in form of a scalar array in column-major format
365 
366    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
367    The T matrix is copied, so the user can destroy this array.
368 
369    Level: Advanced
370 
371 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
372 @*/
373 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
374 {
375   PetscErrorCode ierr;
376   PetscInt       i,j;
377   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
378   PetscBool      isTI = PETSC_FALSE;
379 
380   PetscFunctionBegin;
381   /* check if T is an identity matrix */
382   if (T && (p == q)) {
383     isTI = PETSC_TRUE;
384     for (i=0; i<p; i++) {
385       for (j=0; j<q; j++) {
386         if (i == j) {
387           /* diagonal term must be 1 */
388           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
389         } else {
390           /* off-diagonal term must be 0 */
391           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
392         }
393       }
394     }
395   }
396   a->isTI = isTI;
397 
398   ierr = PetscFree(a->T);CHKERRQ(ierr);
399   if (T && (!isTI)) {
400     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
401     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
402   } else a->T = NULL;
403 
404   a->p = p;
405   a->q = q;
406   PetscFunctionReturn(0);
407 }
408 
409 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
410 {
411   PetscErrorCode ierr;
412   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
413 
414   PetscFunctionBegin;
415   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
416   ierr = PetscFree(b->S);CHKERRQ(ierr);
417   ierr = PetscFree(b->T);CHKERRQ(ierr);
418   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
419   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
420   ierr = PetscFree(A->data);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatSetUp_KAIJ(Mat A)
425 {
426   PetscErrorCode ierr;
427   PetscInt       n;
428   PetscMPIInt    size;
429   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
430 
431   PetscFunctionBegin;
432   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
433   if (size == 1) {
434     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
435     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
436     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
437     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
438     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
439   } else {
440     Mat_MPIKAIJ *a;
441     Mat_MPIAIJ  *mpiaij;
442     IS          from,to;
443     Vec         gvec;
444     PetscScalar *T;
445     PetscInt    i,j;
446 
447     a = (Mat_MPIKAIJ*)A->data;
448     mpiaij = (Mat_MPIAIJ*)a->A->data;
449     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
450     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
451     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
452     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
453     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
454 
455     if (a->isTI) {
456       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
457        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
458        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
459        * to pass in. */
460       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
461       for (i=0; i<a->p; i++) {
462         for (j=0; j<a->q; j++) {
463           if (i==j) T[i+j*a->p] = 1.0;
464           else      T[i+j*a->p] = 0.0;
465         }
466       }
467     } else T = a->T;
468     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
469     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
470     if (a->isTI) {
471       ierr = PetscFree(T);CHKERRQ(ierr);
472     }
473 
474     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
475     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
476     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
477     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
478     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
479 
480     /* create two temporary Index sets for build scatter gather */
481     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
482     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
483 
484     /* create temporary global vector to generate scatter context */
485     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
486 
487     /* generate the scatter context */
488     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
489 
490     ierr = ISDestroy(&from);CHKERRQ(ierr);
491     ierr = ISDestroy(&to);CHKERRQ(ierr);
492     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
493   }
494 
495   A->assembled = PETSC_TRUE;
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
500 {
501   PetscViewerFormat format;
502   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
503   Mat               B;
504   PetscInt          i;
505   PetscErrorCode    ierr;
506   PetscBool         ismpikaij;
507 
508   PetscFunctionBegin;
509   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
510   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
511   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
512     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
513 
514     /* Print appropriate details for S. */
515     if (!a->S) {
516       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
517     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
518       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
519       for (i=0; i<(a->p * a->q); i++) {
520 #if defined(PETSC_USE_COMPLEX)
521         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
522 #else
523         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
524 #endif
525       }
526       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
527     }
528 
529     /* Print appropriate details for T. */
530     if (a->isTI) {
531       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
532     } else if (!a->T) {
533       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
534     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
535       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
536       for (i=0; i<(a->p * a->q); i++) {
537 #if defined(PETSC_USE_COMPLEX)
538         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
539 #else
540         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
541 #endif
542       }
543       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
544     }
545 
546     /* Now print details for the AIJ matrix, using the AIJ viewer. */
547     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
548     if (ismpikaij) {
549       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
550       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
551     } else {
552       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
553     }
554 
555   } else {
556     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
557     if (ismpikaij) {
558       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
559     } else {
560       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
561     }
562     ierr = MatView(B,viewer);CHKERRQ(ierr);
563     ierr = MatDestroy(&B);CHKERRQ(ierr);
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
569 {
570   PetscErrorCode ierr;
571   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
572 
573   PetscFunctionBegin;
574   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
575   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
576   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
577   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
578   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
579   ierr = PetscFree(b->S);CHKERRQ(ierr);
580   ierr = PetscFree(b->T);CHKERRQ(ierr);
581   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
582   ierr = PetscFree(A->data);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 /* --------------------------------------------------------------------------------------*/
587 
588 /* zz = yy + Axx */
589 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590 {
591   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
592   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
593   const PetscScalar *s = b->S, *t = b->T;
594   const PetscScalar *x,*v,*bx;
595   PetscScalar       *y,*sums;
596   PetscErrorCode    ierr;
597   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
598   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
599 
600   PetscFunctionBegin;
601   if (!yy) {
602     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
603   } else {
604     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
605   }
606   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
607 
608   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
609   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
610   idx  = a->j;
611   v    = a->a;
612   ii   = a->i;
613 
614   if (b->isTI) {
615     for (i=0; i<m; i++) {
616       jrow = ii[i];
617       n    = ii[i+1] - jrow;
618       sums = y + p*i;
619       for (j=0; j<n; j++) {
620         for (k=0; k<p; k++) {
621           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
622         }
623       }
624     }
625     ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr);
626   } else if (t) {
627     for (i=0; i<m; i++) {
628       jrow = ii[i];
629       n    = ii[i+1] - jrow;
630       sums = y + p*i;
631       for (j=0; j<n; j++) {
632         for (k=0; k<p; k++) {
633           for (l=0; l<q; l++) {
634             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
635           }
636         }
637       }
638     }
639     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
640      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
641      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
642      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
643     ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
644   }
645   if (s) {
646     for (i=0; i<m; i++) {
647       sums = y + p*i;
648       bx   = x + q*i;
649       if (i < b->AIJ->cmap->n) {
650         for (j=0; j<q; j++) {
651           for (k=0; k<p; k++) {
652             sums[k] += s[k+j*p]*bx[j];
653           }
654         }
655       }
656     }
657     ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr);
658   }
659 
660   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
661   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
666 {
667   PetscErrorCode ierr;
668   PetscFunctionBegin;
669   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 #include <petsc/private/kernels/blockinvert.h>
674 
675 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
676 {
677   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
678   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
679   const PetscScalar *S  = b->S;
680   const PetscScalar *T  = b->T;
681   const PetscScalar *v  = a->a;
682   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
683   PetscErrorCode    ierr;
684   PetscInt          i,j,*v_pivots,dof,dof2;
685   PetscScalar       *diag,aval,*v_work;
686 
687   PetscFunctionBegin;
688   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
689   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
690 
691   dof  = p;
692   dof2 = dof*dof;
693 
694   if (b->ibdiagvalid) {
695     if (values) *values = b->ibdiag;
696     PetscFunctionReturn(0);
697   }
698   if (!b->ibdiag) {
699     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
700     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
701   }
702   if (values) *values = b->ibdiag;
703   diag = b->ibdiag;
704 
705   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
706   for (i=0; i<m; i++) {
707     if (S) {
708       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
709     } else {
710       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
711     }
712     if (b->isTI) {
713       aval = 0;
714       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
715       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
716     } else if (T) {
717       aval = 0;
718       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
719       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
720     }
721     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
722     diag += dof2;
723   }
724   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
725 
726   b->ibdiagvalid = PETSC_TRUE;
727   PetscFunctionReturn(0);
728 }
729 
730 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
731 {
732   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
733 
734   PetscFunctionBegin;
735   *B = kaij->AIJ;
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
740 {
741   PetscErrorCode    ierr;
742   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
743   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
744   const PetscScalar *aa = a->a, *T = kaij->T, *v;
745   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
746   const PetscScalar *b, *xb, *idiag;
747   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
748   PetscInt          i, j, k, i2, bs, bs2, nz;
749 
750   PetscFunctionBegin;
751   its = its*lits;
752   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
753   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
754   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
755   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
756   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
757   else        {bs = p; bs2 = bs*bs; }
758 
759   if (!m) PetscFunctionReturn(0);
760 
761   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
762   idiag = kaij->ibdiag;
763   diag  = a->diag;
764 
765   if (!kaij->sor.setup) {
766     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
767     kaij->sor.setup = PETSC_TRUE;
768   }
769   y     = kaij->sor.y;
770   w     = kaij->sor.w;
771   work  = kaij->sor.work;
772   t     = kaij->sor.t;
773   arr   = kaij->sor.arr;
774 
775   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
776   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
777 
778   if (flag & SOR_ZERO_INITIAL_GUESS) {
779     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
780       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
781       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
782       i2     =  bs;
783       idiag  += bs2;
784       for (i=1; i<m; i++) {
785         v  = aa + ai[i];
786         vi = aj + ai[i];
787         nz = diag[i] - ai[i];
788 
789         if (T) {                /* b - T (Arow * x) */
790           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
791           for (j=0; j<nz; j++) {
792             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
793           }
794           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
795           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
796         } else if (kaij->isTI) {
797           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
798           for (j=0; j<nz; j++) {
799             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
800           }
801         } else {
802           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
803         }
804 
805         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
806         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
807 
808         idiag += bs2;
809         i2    += bs;
810       }
811       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
812       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
813       xb = t;
814     } else xb = b;
815     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
816       idiag = kaij->ibdiag+bs2*(m-1);
817       i2    = bs * (m-1);
818       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
819       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
820       i2    -= bs;
821       idiag -= bs2;
822       for (i=m-2; i>=0; i--) {
823         v  = aa + diag[i] + 1 ;
824         vi = aj + diag[i] + 1;
825         nz = ai[i+1] - diag[i] - 1;
826 
827         if (T) {                /* FIXME: This branch untested */
828           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
829           /* copy all rows of x that are needed into contiguous space */
830           workt = work;
831           for (j=0; j<nz; j++) {
832             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
833             workt += bs;
834           }
835           arrt = arr;
836           for (j=0; j<nz; j++) {
837             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
838             for (k=0; k<bs2; k++) arrt[k] *= v[j];
839             arrt += bs2;
840           }
841           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
842         } else if (kaij->isTI) {
843           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
844           for (j=0; j<nz; j++) {
845             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
846           }
847         }
848 
849         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
850         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
851 
852         idiag -= bs2;
853         i2    -= bs;
854       }
855       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
856     }
857     its--;
858   }
859   while (its--) {               /* FIXME: This branch not updated */
860     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
861       i2     =  0;
862       idiag  = kaij->ibdiag;
863       for (i=0; i<m; i++) {
864         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
865 
866         v  = aa + ai[i];
867         vi = aj + ai[i];
868         nz = diag[i] - ai[i];
869         workt = work;
870         for (j=0; j<nz; j++) {
871           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
872           workt += bs;
873         }
874         arrt = arr;
875         if (T) {
876           for (j=0; j<nz; j++) {
877             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
878             for (k=0; k<bs2; k++) arrt[k] *= v[j];
879             arrt += bs2;
880           }
881           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
882         } else if (kaij->isTI) {
883           for (j=0; j<nz; j++) {
884             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
885             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
886             arrt += bs2;
887           }
888           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
889         }
890         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
891 
892         v  = aa + diag[i] + 1;
893         vi = aj + diag[i] + 1;
894         nz = ai[i+1] - diag[i] - 1;
895         workt = work;
896         for (j=0; j<nz; j++) {
897           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
898           workt += bs;
899         }
900         arrt = arr;
901         if (T) {
902           for (j=0; j<nz; j++) {
903             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
904             for (k=0; k<bs2; k++) arrt[k] *= v[j];
905             arrt += bs2;
906           }
907           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908         } else if (kaij->isTI) {
909           for (j=0; j<nz; j++) {
910             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
911             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
912             arrt += bs2;
913           }
914           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
915         }
916 
917         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
918         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
919 
920         idiag += bs2;
921         i2    += bs;
922       }
923       xb = t;
924     }
925     else xb = b;
926     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
927       idiag = kaij->ibdiag+bs2*(m-1);
928       i2    = bs * (m-1);
929       if (xb == b) {
930         for (i=m-1; i>=0; i--) {
931           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
932 
933           v  = aa + ai[i];
934           vi = aj + ai[i];
935           nz = diag[i] - ai[i];
936           workt = work;
937           for (j=0; j<nz; j++) {
938             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
939             workt += bs;
940           }
941           arrt = arr;
942           if (T) {
943             for (j=0; j<nz; j++) {
944               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
945               for (k=0; k<bs2; k++) arrt[k] *= v[j];
946               arrt += bs2;
947             }
948             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949           } else if (kaij->isTI) {
950             for (j=0; j<nz; j++) {
951               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
952               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953               arrt += bs2;
954             }
955             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956           }
957 
958           v  = aa + diag[i] + 1;
959           vi = aj + diag[i] + 1;
960           nz = ai[i+1] - diag[i] - 1;
961           workt = work;
962           for (j=0; j<nz; j++) {
963             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
964             workt += bs;
965           }
966           arrt = arr;
967           if (T) {
968             for (j=0; j<nz; j++) {
969               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
970               for (k=0; k<bs2; k++) arrt[k] *= v[j];
971               arrt += bs2;
972             }
973             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974           } else if (kaij->isTI) {
975             for (j=0; j<nz; j++) {
976               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
977               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978               arrt += bs2;
979             }
980             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981           }
982 
983           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
985         }
986       } else {
987         for (i=m-1; i>=0; i--) {
988           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
989           v  = aa + diag[i] + 1;
990           vi = aj + diag[i] + 1;
991           nz = ai[i+1] - diag[i] - 1;
992           workt = work;
993           for (j=0; j<nz; j++) {
994             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
995             workt += bs;
996           }
997           arrt = arr;
998           if (T) {
999             for (j=0; j<nz; j++) {
1000               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1001               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1002               arrt += bs2;
1003             }
1004             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1005           } else if (kaij->isTI) {
1006             for (j=0; j<nz; j++) {
1007               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1008               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1009               arrt += bs2;
1010             }
1011             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1012           }
1013           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1014           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1015         }
1016       }
1017       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1018     }
1019   }
1020 
1021   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1022   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 /*===================================================================================*/
1027 
1028 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1029 {
1030   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   if (!yy) {
1035     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1036   } else {
1037     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1038   }
1039   /* start the scatter */
1040   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1042   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1048 {
1049   PetscErrorCode ierr;
1050   PetscFunctionBegin;
1051   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1056 {
1057   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1058   PetscErrorCode  ierr;
1059 
1060   PetscFunctionBegin;
1061   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 /* ----------------------------------------------------------------*/
1066 
1067 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1068 {
1069   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1070   PetscErrorCode  diag = PETSC_FALSE;
1071   PetscErrorCode  ierr;
1072   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1073   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1074 
1075   PetscFunctionBegin;
1076   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1077   b->getrowactive = PETSC_TRUE;
1078   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1079 
1080   if ((!S) && (!T) && (!b->isTI)) {
1081     if (ncols)    *ncols  = 0;
1082     if (cols)     *cols   = NULL;
1083     if (values)   *values = NULL;
1084     PetscFunctionReturn(0);
1085   }
1086 
1087   if (T || b->isTI) {
1088     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1089     c     = nzaij;
1090     for (i=0; i<nzaij; i++) {
1091       /* check if this row contains a diagonal entry */
1092       if (colsaij[i] == r) {
1093         diag = PETSC_TRUE;
1094         c = i;
1095       }
1096     }
1097   } else nzaij = c = 0;
1098 
1099   /* calculate size of row */
1100   nz = 0;
1101   if (S)            nz += q;
1102   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1103 
1104   if (cols || values) {
1105     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1106     for (i=0; i<q; i++) {
1107       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1108       v[i] = 0.0;
1109     }
1110     if (b->isTI) {
1111       for (i=0; i<nzaij; i++) {
1112         for (j=0; j<q; j++) {
1113           idx[i*q+j] = colsaij[i]*q+j;
1114           v[i*q+j]   = (j==s ? vaij[i] : 0);
1115         }
1116       }
1117     } else if (T) {
1118       for (i=0; i<nzaij; i++) {
1119         for (j=0; j<q; j++) {
1120           idx[i*q+j] = colsaij[i]*q+j;
1121           v[i*q+j]   = vaij[i]*T[s+j*p];
1122         }
1123       }
1124     }
1125     if (S) {
1126       for (j=0; j<q; j++) {
1127         idx[c*q+j] = r*q+j;
1128         v[c*q+j]  += S[s+j*p];
1129       }
1130     }
1131   }
1132 
1133   if (ncols)    *ncols  = nz;
1134   if (cols)     *cols   = idx;
1135   if (values)   *values = v;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1140 {
1141   PetscErrorCode ierr;
1142   PetscFunctionBegin;
1143   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1144   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1149 {
1150   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1151   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1152   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1153   Mat             AIJ     = b->A;
1154   PetscBool       diag    = PETSC_FALSE;
1155   PetscErrorCode  ierr;
1156   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1157   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1158   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1159 
1160   PetscFunctionBegin;
1161   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1162   b->getrowactive = PETSC_TRUE;
1163   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1164   lrow = row - rstart;
1165 
1166   if ((!S) && (!T) && (!b->isTI)) {
1167     if (ncols)    *ncols  = 0;
1168     if (cols)     *cols   = NULL;
1169     if (values)   *values = NULL;
1170     PetscFunctionReturn(0);
1171   }
1172 
1173   r = lrow/p;
1174   s = lrow%p;
1175 
1176   if (T || b->isTI) {
1177     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1178     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1179     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1180 
1181     c     = ncolsaij + ncolsoaij;
1182     for (i=0; i<ncolsaij; i++) {
1183       /* check if this row contains a diagonal entry */
1184       if (colsaij[i] == r) {
1185         diag = PETSC_TRUE;
1186         c = i;
1187       }
1188     }
1189   } else c = 0;
1190 
1191   /* calculate size of row */
1192   nz = 0;
1193   if (S)            nz += q;
1194   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1195 
1196   if (cols || values) {
1197     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1198     for (i=0; i<q; i++) {
1199       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1200       v[i] = 0.0;
1201     }
1202     if (b->isTI) {
1203       for (i=0; i<ncolsaij; i++) {
1204         for (j=0; j<q; j++) {
1205           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1206           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1207         }
1208       }
1209       for (i=0; i<ncolsoaij; i++) {
1210         for (j=0; j<q; j++) {
1211           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1212           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1213         }
1214       }
1215     } else if (T) {
1216       for (i=0; i<ncolsaij; i++) {
1217         for (j=0; j<q; j++) {
1218           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1219           v[i*q+j]   = vals[i]*T[s+j*p];
1220         }
1221       }
1222       for (i=0; i<ncolsoaij; i++) {
1223         for (j=0; j<q; j++) {
1224           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1225           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1226         }
1227       }
1228     }
1229     if (S) {
1230       for (j=0; j<q; j++) {
1231         idx[c*q+j] = (r+rstart/p)*q+j;
1232         v[c*q+j]  += S[s+j*p];
1233       }
1234     }
1235   }
1236 
1237   if (ncols)  *ncols  = nz;
1238   if (cols)   *cols   = idx;
1239   if (values) *values = v;
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1244 {
1245   PetscErrorCode ierr;
1246   PetscFunctionBegin;
1247   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1248   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1253 {
1254   PetscErrorCode ierr;
1255   Mat            A;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1259   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1260   ierr = MatDestroy(&A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 /* ---------------------------------------------------------------------------------- */
1265 /*@C
1266   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1267 
1268     [I \otimes S + A \otimes T]
1269 
1270   where
1271     S is a dense (p \times q) matrix
1272     T is a dense (p \times q) matrix
1273     A is an AIJ  (n \times n) matrix
1274     I is the identity matrix
1275   The resulting matrix is (np \times nq)
1276 
1277   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1278 
1279   Collective
1280 
1281   Input Parameters:
1282 + A - the AIJ matrix
1283 . p - number of rows in S and T
1284 . q - number of columns in S and T
1285 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1286 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1287 
1288   Output Parameter:
1289 . kaij - the new KAIJ matrix
1290 
1291   Notes:
1292   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1293   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1294 
1295   Level: advanced
1296 
1297 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1298 @*/
1299 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1300 {
1301   PetscErrorCode ierr;
1302   PetscMPIInt    size;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1307   if (size == 1) {
1308     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1309   } else {
1310     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1311   }
1312   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1313   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1314   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1315   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*MC
1320   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1321     [I \otimes S + A \otimes T],
1322   where
1323     S is a dense (p \times q) matrix,
1324     T is a dense (p \times q) matrix,
1325     A is an AIJ  (n \times n) matrix,
1326     and I is the identity matrix.
1327   The resulting matrix is (np \times nq).
1328 
1329   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1330 
1331   Notes:
1332   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1333   where x and b are column vectors containing the row-major representations of X and B.
1334 
1335   Level: advanced
1336 
1337 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1338 M*/
1339 
1340 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1341 {
1342   PetscErrorCode ierr;
1343   Mat_MPIKAIJ    *b;
1344   PetscMPIInt    size;
1345 
1346   PetscFunctionBegin;
1347   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1348   A->data  = (void*)b;
1349 
1350   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1351 
1352   A->ops->setup = MatSetUp_KAIJ;
1353 
1354   b->w    = 0;
1355   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1356   if (size == 1) {
1357     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1358     A->ops->setup               = MatSetUp_KAIJ;
1359     A->ops->destroy             = MatDestroy_SeqKAIJ;
1360     A->ops->view                = MatView_KAIJ;
1361     A->ops->mult                = MatMult_SeqKAIJ;
1362     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1363     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1364     A->ops->getrow              = MatGetRow_SeqKAIJ;
1365     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1366     A->ops->sor                 = MatSOR_SeqKAIJ;
1367   } else {
1368     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1369     A->ops->setup               = MatSetUp_KAIJ;
1370     A->ops->destroy             = MatDestroy_MPIKAIJ;
1371     A->ops->view                = MatView_KAIJ;
1372     A->ops->mult                = MatMult_MPIKAIJ;
1373     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1374     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1375     A->ops->getrow              = MatGetRow_MPIKAIJ;
1376     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1377     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1378   }
1379   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1380   PetscFunctionReturn(0);
1381 }
1382