xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 123145dfa260b404ec74fab6a7bbe490bcc93c48)
1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2 /*
3   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4           C = A * B
5           C = P * A * P^T
6 */
7 
8 #include "src/mat/impls/aij/seq/aij.h"
9 #include "src/mat/utils/freespace.h"
10 
11 static int logkey_matmatmult            = 0;
12 static int logkey_matmatmult_symbolic   = 0;
13 static int logkey_matmatmult_numeric    = 0;
14 
15 static int logkey_matapplypapt          = 0;
16 static int logkey_matapplypapt_symbolic = 0;
17 static int logkey_matapplypapt_numeric  = 0;
18 
19 /*
20      MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
21            C = A * B;
22 
23      Note: C is assumed to be uncreated.
24            If this is not the case, Destroy C before calling this routine.
25 */
26 #ifdef USE_INTSORT
27 /*
28 This roution is modified by the one below for better performance.
29 The changes are:
30    -- PetscSortInt() is replace by a linked list
31    -- malloc larger Initial FreeSpace
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
35 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
36 {
37   int            ierr;
38   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
39   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
40   int            aishift=a->indexshift,bishift=b->indexshift;
41   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
42   int            *ci,*cj,*denserow,*sparserow;
43   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
44   int            i,j,k,anzi,brow,bnzj,cnzi;
45   MatScalar      *ca;
46 
47   PetscFunctionBegin;
48   /* some error checking which could be moved into interface layer */
49   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
50   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
51 
52   /* Set up timers */
53   if (!logkey_matmatmult_symbolic) {
54     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
55   }
56   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
57 
58   /* Set up */
59   /* Allocate ci array, arrays for fill computation and */
60   /* free space for accumulating nonzero column info */
61   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
62   ci[0] = 0;
63 
64   ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr);
65   ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr);
66   sparserow = denserow + bn;
67 
68   /* Initial FreeSpace size is nnz(B)=bi[bm] */
69   ierr          = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr);
70   current_space = free_space;
71 
72   /* Determine symbolic info for each row of the product: */
73   for (i=0;i<am;i++) {
74     anzi = ai[i+1] - ai[i];
75     cnzi = 0;
76     for (j=0;j<anzi;j++) {
77       brow = *aj++;
78       bnzj = bi[brow+1] - bi[brow];
79       bjj  = bj + bi[brow];
80       for (k=0;k<bnzj;k++) {
81         /* If column is not marked, mark it in compressed and uncompressed locations. */
82         /* For simplicity, leave uncompressed row unsorted until finished with row, */
83         /* and increment nonzero count for this row. */
84         if (!denserow[bjj[k]]) {
85           denserow[bjj[k]]  = -1;
86           sparserow[cnzi++] = bjj[k];
87         }
88       }
89     }
90 
91     /* sort sparserow */
92     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
93 
94     /* If free space is not available, make more free space */
95     /* Double the amount of total space in the list */
96     if (current_space->local_remaining<cnzi) {
97       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
98     }
99 
100     /* Copy data into free space, and zero out denserow */
101     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
102     current_space->array           += cnzi;
103     current_space->local_used      += cnzi;
104     current_space->local_remaining -= cnzi;
105     for (j=0;j<cnzi;j++) {
106       denserow[sparserow[j]] = 0;
107     }
108     ci[i+1] = ci[i] + cnzi;
109   }
110 
111   /* Column indices are in the list of free space */
112   /* Allocate space for cj, initialize cj, and */
113   /* destroy list of free space and other temporary array(s) */
114   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
115   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
116   ierr = PetscFree(denserow);CHKERRQ(ierr);
117 
118   /* Allocate space for ca */
119   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
120   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
121 
122   /* put together the new matrix */
123   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
124 
125   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
126   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
127   c = (Mat_SeqAIJ *)((*C)->data);
128   c->freedata = PETSC_TRUE;
129   c->nonew    = 0;
130 
131   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 #endif /*  USE_INTSORT */
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
138 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
139 {
140   int            ierr;
141   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
142   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
143   int            aishift=a->indexshift,bishift=b->indexshift;
144   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
145   int            *ci,*cj,*lnk,idx0,idx,bcol;
146   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
147   int            i,j,k,anzi,brow,bnzj,cnzi;
148   MatScalar      *ca;
149   PetscLogDouble t0,t1,etime=0.0;
150 
151   PetscFunctionBegin;
152   /* some error checking which could be moved into interface layer */
153   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
154   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
155 
156   /* Set up timers */
157   if (!logkey_matmatmult_symbolic) {
158     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
159   }
160   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
161 
162   /* Set up */
163   /* Allocate ci array, arrays for fill computation and */
164   /* free space for accumulating nonzero column info */
165   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
166   ci[0] = 0;
167 
168   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
169   for (i=0; i<bn; i++) lnk[i] = -1;
170 
171   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
172   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
173   current_space = free_space;
174 
175   /* Determine symbolic info for each row of the product: */
176   for (i=0;i<am;i++) {
177     anzi = ai[i+1] - ai[i];
178     cnzi = 0;
179     lnk[bn] = bn;
180     for (j=0;j<anzi;j++) {
181       brow = *aj++;
182       bnzj = bi[brow+1] - bi[brow];
183       bjj  = bj + bi[brow];
184       idx  = bn;
185       for (k=0;k<bnzj;k++) {
186         bcol = bjj[k];
187         if (lnk[bcol] == -1) { /* new col */
188           if (k>0) idx = bjj[k-1];
189           do {
190             idx0 = idx;
191             idx  = lnk[idx0];
192           } while (bcol > idx);
193           lnk[idx0] = bcol;
194           lnk[bcol] = idx;
195           cnzi++;
196         }
197       }
198     }
199 
200     /* If free space is not available, make more free space */
201     /* Double the amount of total space in the list */
202     if (current_space->local_remaining<cnzi) {
203       printf("...%d -th row, double space ...\n",i);
204       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
205     }
206 
207     /* Copy data into free space, and zero out denserow and lnk */
208     idx = bn;
209     for (j=0; j<cnzi; j++){
210       idx0 = idx;
211       idx  = lnk[idx0];
212       *current_space->array++ = idx;
213       lnk[idx0] = -1;
214     }
215     lnk[idx] = -1;
216 
217     current_space->local_used      += cnzi;
218     current_space->local_remaining -= cnzi;
219 
220     ci[i+1] = ci[i] + cnzi;
221   }
222 
223   /* Column indices are in the list of free space */
224   /* Allocate space for cj, initialize cj, and */
225   /* destroy list of free space and other temporary array(s) */
226   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
227   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
228   ierr = PetscFree(lnk);CHKERRQ(ierr);
229 
230   /* Allocate space for ca */
231   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
232   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
233 
234   /* put together the new matrix */
235   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
236 
237   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
238   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
239   c = (Mat_SeqAIJ *)((*C)->data);
240   c->freedata = PETSC_TRUE;
241   c->nonew    = 0;
242 
243   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 /*
248      MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
249            C=A*B;
250      Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
251 */
252 #undef __FUNCT__
253 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
254 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
255 {
256   int        ierr,flops=0;
257   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
258   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
259   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
260   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
261   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
262   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
263   int        i,j,k,anzi,bnzi,cnzi,brow;
264   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
265 
266   PetscFunctionBegin;
267 
268   /* This error checking should be unnecessary if the symbolic was performed */
269   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
270   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
271   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
272   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
273 
274   /* Set up timers */
275   if (!logkey_matmatmult_numeric) {
276     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
277   }
278   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
279 
280   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
281   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
282   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
283   /* Traverse A row-wise. */
284   /* Build the ith row in C by summing over nonzero columns in A, */
285   /* the rows of B corresponding to nonzeros of A. */
286   for (i=0;i<am;i++) {
287     anzi = ai[i+1] - ai[i];
288     for (j=0;j<anzi;j++) {
289       brow = *aj++;
290       bnzi = bi[brow+1] - bi[brow];
291       bjj  = bj + bi[brow];
292       baj  = ba + bi[brow];
293       for (k=0;k<bnzi;k++) {
294         temp[bjj[k]] += (*aa)*baj[k];
295       }
296       flops += 2*bnzi;
297       aa++;
298     }
299     /* Store row back into C, and re-zero temp */
300     cnzi = ci[i+1] - ci[i];
301     for (j=0;j<cnzi;j++) {
302       ca[j] = temp[cj[j]];
303       temp[cj[j]] = 0.0;
304     }
305     ca += cnzi;
306     cj += cnzi;
307   }
308   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
309   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
310 
311   /* Free temp */
312   ierr = PetscFree(temp);CHKERRQ(ierr);
313   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
314   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
320 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
321   int ierr;
322 
323   PetscFunctionBegin;
324   if (!logkey_matmatmult) {
325     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
326   }
327   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
328   ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr);
329   ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
330   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 
335 /*
336      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
337            C = P * A * P^T;
338 
339      Note: C is assumed to be uncreated.
340            If this is not the case, Destroy C before calling this routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ"
344 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
345   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
346   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
347   int            ierr;
348   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
350   int            aishift=a->indexshift,pishift=p->indexshift;
351   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
352   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
353   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
354   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
355   MatScalar      *ca;
356 
357   PetscFunctionBegin;
358 
359   /* some error checking which could be moved into interface layer */
360   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
361   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
362   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
363 
364   /* Set up timers */
365   if (!logkey_matapplypapt_symbolic) {
366     ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
367   }
368   ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
369 
370   /* Create ij structure of P^T */
371   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
372 
373   /* Allocate ci array, arrays for fill computation and */
374   /* free space for accumulating nonzero column info */
375   ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
376   ci[0] = 0;
377 
378   ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr);
379   ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr);
380   pasparserow  = padenserow  + an;
381   denserow     = pasparserow + an;
382   sparserow    = denserow    + pm;
383 
384   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
385   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
386   ierr          = GetMoreSpace((ai[am]/pn)*pm,&free_space);
387   current_space = free_space;
388 
389   /* Determine fill for each row of C: */
390   for (i=0;i<pm;i++) {
391     pnzi  = pi[i+1] - pi[i];
392     panzi = 0;
393     /* Get symbolic sparse row of PA: */
394     for (j=0;j<pnzi;j++) {
395       arow = *pj++;
396       anzj = ai[arow+1] - ai[arow];
397       ajj  = aj + ai[arow];
398       for (k=0;k<anzj;k++) {
399         if (!padenserow[ajj[k]]) {
400           padenserow[ajj[k]]   = -1;
401           pasparserow[panzi++] = ajj[k];
402         }
403       }
404     }
405     /* Using symbolic row of PA, determine symbolic row of C: */
406     paj    = pasparserow;
407     cnzi   = 0;
408     for (j=0;j<panzi;j++) {
409       ptrow = *paj++;
410       ptnzj = pti[ptrow+1] - pti[ptrow];
411       ptjj  = ptj + pti[ptrow];
412       for (k=0;k<ptnzj;k++) {
413         if (!denserow[ptjj[k]]) {
414           denserow[ptjj[k]] = -1;
415           sparserow[cnzi++] = ptjj[k];
416         }
417       }
418     }
419 
420     /* sort sparse representation */
421     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
422 
423     /* If free space is not available, make more free space */
424     /* Double the amount of total space in the list */
425     if (current_space->local_remaining<cnzi) {
426       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
427     }
428 
429     /* Copy data into free space, and zero out dense row */
430     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
431     current_space->array           += cnzi;
432     current_space->local_used      += cnzi;
433     current_space->local_remaining -= cnzi;
434 
435     for (j=0;j<panzi;j++) {
436       padenserow[pasparserow[j]] = 0;
437     }
438     for (j=0;j<cnzi;j++) {
439       denserow[sparserow[j]] = 0;
440     }
441     ci[i+1] = ci[i] + cnzi;
442   }
443   /* column indices are in the list of free space */
444   /* Allocate space for cj, initialize cj, and */
445   /* destroy list of free space and other temporary array(s) */
446   ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr);
447   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
448   ierr = PetscFree(padenserow);CHKERRQ(ierr);
449 
450   /* Allocate space for ca */
451   ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
452   ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr);
453 
454   /* put together the new matrix */
455   ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr);
456 
457   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
458   /* Since these are PETSc arrays, change flags to free them as necessary. */
459   c = (Mat_SeqAIJ *)((*C)->data);
460   c->freedata = PETSC_TRUE;
461   c->nonew    = 0;
462 
463   /* Clean up. */
464   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
465 
466   ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 /*
471      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
472            C = P * A * P^T;
473      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
474 */
475 #undef __FUNCT__
476 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ"
477 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
478   int        ierr,flops=0;
479   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
480   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
481   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
482   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
483   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
484   int        *ci=c->i,*cj=c->j;
485   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
486   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
487   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
488 
489   PetscFunctionBegin;
490 
491   /* This error checking should be unnecessary if the symbolic was performed */
492   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
493   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
494   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
495   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
496   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
497 
498   /* Set up timers */
499   if (!logkey_matapplypapt_numeric) {
500     ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr);
501   }
502   ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
503 
504   ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr);
505   ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
506   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
507 
508   paj      = (int *)(paa + an);
509   pajdense = paj + an;
510 
511   for (i=0;i<pm;i++) {
512     /* Form sparse row of P*A */
513     pnzi  = pi[i+1] - pi[i];
514     panzj = 0;
515     for (j=0;j<pnzi;j++) {
516       arow = *pj++;
517       anzj = ai[arow+1] - ai[arow];
518       ajj  = aj + ai[arow];
519       aaj  = aa + ai[arow];
520       for (k=0;k<anzj;k++) {
521         if (!pajdense[ajj[k]]) {
522           pajdense[ajj[k]] = -1;
523           paj[panzj++]     = ajj[k];
524         }
525         paa[ajj[k]] += (*pa)*aaj[k];
526       }
527       flops += 2*anzj;
528       pa++;
529     }
530 
531     /* Sort the j index array for quick sparse axpy. */
532     ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr);
533 
534     /* Compute P*A*P^T using sparse inner products. */
535     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
536     cnzi = ci[i+1] - ci[i];
537     for (j=0;j<cnzi;j++) {
538       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
539       ptcol = *cj++;
540       ptnzj = pi[ptcol+1] - pi[ptcol];
541       ptj   = pjj + pi[ptcol];
542       ptaj  = pta + pi[ptcol];
543       sum   = 0.;
544       k1    = 0;
545       k2    = 0;
546       while ((k1<panzj) && (k2<ptnzj)) {
547         if (paj[k1]==ptj[k2]) {
548           sum += paa[paj[k1++]]*ptaj[k2++];
549         } else if (paj[k1] < ptj[k2]) {
550           k1++;
551         } else /* if (paj[k1] > ptj[k2]) */ {
552           k2++;
553         }
554       }
555       *ca++ = sum;
556     }
557 
558     /* Zero the current row info for P*A */
559     for (j=0;j<panzj;j++) {
560       paa[paj[j]]      = 0.;
561       pajdense[paj[j]] = 0;
562     }
563   }
564 
565   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
568   ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ"
574 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
575   int ierr;
576 
577   PetscFunctionBegin;
578   if (!logkey_matapplypapt) {
579     ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr);
580   }
581   ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
582   ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
583   ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
584   ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587