xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 8229bfc2e272e22dd52541101af2fe6fdaaa77c5)
1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2 /*
3   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4           C = A * B
5           C = P * A * P^T
6 */
7 
8 #include "src/mat/impls/aij/seq/aij.h"
9 #include "src/mat/utils/freespace.h"
10 
11 static int logkey_matmatmult            = 0;
12 static int logkey_matmatmult_symbolic   = 0;
13 static int logkey_matmatmult_numeric    = 0;
14 
15 static int logkey_matapplypapt          = 0;
16 static int logkey_matapplypapt_symbolic = 0;
17 static int logkey_matapplypapt_numeric  = 0;
18 
19 /*
20      MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
21            C = A * B;
22 
23      Note: C is assumed to be uncreated.
24            If this is not the case, Destroy C before calling this routine.
25 */
26 #ifdef USE_INTSORT
27 /*
28 This roution is modified by the one below for better performance.
29 The changes are:
30    -- PetscSortInt() is replace by a linked list
31    -- malloc larger Initial FreeSpace
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
35 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
36 {
37   int            ierr;
38   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
39   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
40   int            aishift=a->indexshift,bishift=b->indexshift;
41   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
42   int            *ci,*cj,*denserow,*sparserow;
43   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
44   int            i,j,k,anzi,brow,bnzj,cnzi;
45   MatScalar      *ca;
46 
47   PetscFunctionBegin;
48   /* some error checking which could be moved into interface layer */
49   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
50   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
51 
52   /* Set up timers */
53   if (!logkey_matmatmult_symbolic) {
54     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
55   }
56   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
57 
58   /* Set up */
59   /* Allocate ci array, arrays for fill computation and */
60   /* free space for accumulating nonzero column info */
61   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
62   ci[0] = 0;
63 
64   ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr);
65   ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr);
66   sparserow = denserow + bn;
67 
68   /* Initial FreeSpace size is nnz(B)=bi[bm] */
69   ierr          = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr);
70   current_space = free_space;
71 
72   /* Determine symbolic info for each row of the product: */
73   for (i=0;i<am;i++) {
74     anzi = ai[i+1] - ai[i];
75     cnzi = 0;
76     for (j=0;j<anzi;j++) {
77       brow = *aj++;
78       bnzj = bi[brow+1] - bi[brow];
79       bjj  = bj + bi[brow];
80       for (k=0;k<bnzj;k++) {
81         /* If column is not marked, mark it in compressed and uncompressed locations. */
82         /* For simplicity, leave uncompressed row unsorted until finished with row, */
83         /* and increment nonzero count for this row. */
84         if (!denserow[bjj[k]]) {
85           denserow[bjj[k]]  = -1;
86           sparserow[cnzi++] = bjj[k];
87         }
88       }
89     }
90 
91     /* sort sparserow */
92     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
93 
94     /* If free space is not available, make more free space */
95     /* Double the amount of total space in the list */
96     if (current_space->local_remaining<cnzi) {
97       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
98     }
99 
100     /* Copy data into free space, and zero out denserow */
101     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
102     current_space->array           += cnzi;
103     current_space->local_used      += cnzi;
104     current_space->local_remaining -= cnzi;
105     for (j=0;j<cnzi;j++) {
106       denserow[sparserow[j]] = 0;
107     }
108     ci[i+1] = ci[i] + cnzi;
109   }
110 
111   /* Column indices are in the list of free space */
112   /* Allocate space for cj, initialize cj, and */
113   /* destroy list of free space and other temporary array(s) */
114   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
115   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
116   ierr = PetscFree(denserow);CHKERRQ(ierr);
117 
118   /* Allocate space for ca */
119   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
120   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
121 
122   /* put together the new matrix */
123   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
124 
125   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
126   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
127   c = (Mat_SeqAIJ *)((*C)->data);
128   c->freedata = PETSC_TRUE;
129   c->nonew    = 0;
130 
131   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 #endif /*  USE_INTSORT */
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
138 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
139 {
140   int            ierr;
141   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
142   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
143   int            aishift=a->indexshift,bishift=b->indexshift;
144   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
145   int            *ci,*cj,*lnk,idx0,idx,bcol;
146   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
147   int            i,j,k,anzi,brow,bnzj,cnzi;
148   MatScalar      *ca;
149 
150   PetscFunctionBegin;
151   /* some error checking which could be moved into interface layer */
152   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
153   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
154 
155   /* Set up timers */
156   if (!logkey_matmatmult_symbolic) {
157     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
158   }
159   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
160 
161   /* Set up */
162   /* Allocate ci array, arrays for fill computation and */
163   /* free space for accumulating nonzero column info */
164   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
165   ci[0] = 0;
166 
167   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
168   for (i=0; i<bn; i++) lnk[i] = -1;
169 
170   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
171   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
172   current_space = free_space;
173 
174   /* Determine symbolic info for each row of the product: */
175   for (i=0;i<am;i++) {
176     anzi = ai[i+1] - ai[i];
177     cnzi = 0;
178     lnk[bn] = bn;
179     for (j=0;j<anzi;j++) {
180       brow = *aj++;
181       bnzj = bi[brow+1] - bi[brow];
182       bjj  = bj + bi[brow];
183       idx  = bn;
184       for (k=0;k<bnzj;k++) {
185         bcol = bjj[k];
186         if (lnk[bcol] == -1) { /* new col */
187           if (k>0) idx = bjj[k-1];
188           do {
189             idx0 = idx;
190             idx  = lnk[idx0];
191           } while (bcol > idx);
192           lnk[idx0] = bcol;
193           lnk[bcol] = idx;
194           cnzi++;
195         }
196       }
197     }
198 
199     /* If free space is not available, make more free space */
200     /* Double the amount of total space in the list */
201     if (current_space->local_remaining<cnzi) {
202       printf("...%d -th row, double space ...\n",i);
203       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
204     }
205 
206     /* Copy data into free space, and zero out denserow and lnk */
207     idx = bn;
208     for (j=0; j<cnzi; j++){
209       idx0 = idx;
210       idx  = lnk[idx0];
211       *current_space->array++ = idx;
212       lnk[idx0] = -1;
213     }
214     lnk[idx] = -1;
215 
216     current_space->local_used      += cnzi;
217     current_space->local_remaining -= cnzi;
218 
219     ci[i+1] = ci[i] + cnzi;
220   }
221 
222   /* Column indices are in the list of free space */
223   /* Allocate space for cj, initialize cj, and */
224   /* destroy list of free space and other temporary array(s) */
225   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
226   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
227   ierr = PetscFree(lnk);CHKERRQ(ierr);
228 
229   /* Allocate space for ca */
230   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
231   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
232 
233   /* put together the new matrix */
234   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
235 
236   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
237   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
238   c = (Mat_SeqAIJ *)((*C)->data);
239   c->freedata = PETSC_TRUE;
240   c->nonew    = 0;
241 
242   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 /*
247      MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
248            C=A*B;
249      Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
250 */
251 #undef __FUNCT__
252 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
253 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
254 {
255   int        ierr,flops=0;
256   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
257   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
258   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
259   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
260   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
261   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
262   int        i,j,k,anzi,bnzi,cnzi,brow;
263   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
264 
265   PetscFunctionBegin;
266 
267   /* This error checking should be unnecessary if the symbolic was performed */
268   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
269   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
270   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
271   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
272 
273   /* Set up timers */
274   if (!logkey_matmatmult_numeric) {
275     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
276   }
277   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
278 
279   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
280   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
281   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
282   /* Traverse A row-wise. */
283   /* Build the ith row in C by summing over nonzero columns in A, */
284   /* the rows of B corresponding to nonzeros of A. */
285   for (i=0;i<am;i++) {
286     anzi = ai[i+1] - ai[i];
287     for (j=0;j<anzi;j++) {
288       brow = *aj++;
289       bnzi = bi[brow+1] - bi[brow];
290       bjj  = bj + bi[brow];
291       baj  = ba + bi[brow];
292       for (k=0;k<bnzi;k++) {
293         temp[bjj[k]] += (*aa)*baj[k];
294       }
295       flops += 2*bnzi;
296       aa++;
297     }
298     /* Store row back into C, and re-zero temp */
299     cnzi = ci[i+1] - ci[i];
300     for (j=0;j<cnzi;j++) {
301       ca[j] = temp[cj[j]];
302       temp[cj[j]] = 0.0;
303     }
304     ca += cnzi;
305     cj += cnzi;
306   }
307   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
308   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
309 
310   /* Free temp */
311   ierr = PetscFree(temp);CHKERRQ(ierr);
312   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
313   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
319 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
320   int ierr;
321 
322   PetscFunctionBegin;
323   if (!logkey_matmatmult) {
324     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
325   }
326   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
327   ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr);
328   ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
329   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
330   PetscFunctionReturn(0);
331 }
332 
333 
334 /*
335      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
336            C = P * A * P^T;
337 
338      Note: C is assumed to be uncreated.
339            If this is not the case, Destroy C before calling this routine.
340 */
341 #undef __FUNCT__
342 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ"
343 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
344   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
345   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
346   int            ierr;
347   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
349   int            aishift=a->indexshift,pishift=p->indexshift;
350   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
351   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
352   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
353   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
354   MatScalar      *ca;
355 
356   PetscFunctionBegin;
357 
358   /* some error checking which could be moved into interface layer */
359   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
360   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
361   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
362 
363   /* Set up timers */
364   if (!logkey_matapplypapt_symbolic) {
365     ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
366   }
367   ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
368 
369   /* Create ij structure of P^T */
370   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
371 
372   /* Allocate ci array, arrays for fill computation and */
373   /* free space for accumulating nonzero column info */
374   ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
375   ci[0] = 0;
376 
377   ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr);
378   ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr);
379   pasparserow  = padenserow  + an;
380   denserow     = pasparserow + an;
381   sparserow    = denserow    + pm;
382 
383   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
384   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
385   ierr          = GetMoreSpace((ai[am]/pn)*pm,&free_space);
386   current_space = free_space;
387 
388   /* Determine fill for each row of C: */
389   for (i=0;i<pm;i++) {
390     pnzi  = pi[i+1] - pi[i];
391     panzi = 0;
392     /* Get symbolic sparse row of PA: */
393     for (j=0;j<pnzi;j++) {
394       arow = *pj++;
395       anzj = ai[arow+1] - ai[arow];
396       ajj  = aj + ai[arow];
397       for (k=0;k<anzj;k++) {
398         if (!padenserow[ajj[k]]) {
399           padenserow[ajj[k]]   = -1;
400           pasparserow[panzi++] = ajj[k];
401         }
402       }
403     }
404     /* Using symbolic row of PA, determine symbolic row of C: */
405     paj    = pasparserow;
406     cnzi   = 0;
407     for (j=0;j<panzi;j++) {
408       ptrow = *paj++;
409       ptnzj = pti[ptrow+1] - pti[ptrow];
410       ptjj  = ptj + pti[ptrow];
411       for (k=0;k<ptnzj;k++) {
412         if (!denserow[ptjj[k]]) {
413           denserow[ptjj[k]] = -1;
414           sparserow[cnzi++] = ptjj[k];
415         }
416       }
417     }
418 
419     /* sort sparse representation */
420     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
421 
422     /* If free space is not available, make more free space */
423     /* Double the amount of total space in the list */
424     if (current_space->local_remaining<cnzi) {
425       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
426     }
427 
428     /* Copy data into free space, and zero out dense row */
429     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
430     current_space->array           += cnzi;
431     current_space->local_used      += cnzi;
432     current_space->local_remaining -= cnzi;
433 
434     for (j=0;j<panzi;j++) {
435       padenserow[pasparserow[j]] = 0;
436     }
437     for (j=0;j<cnzi;j++) {
438       denserow[sparserow[j]] = 0;
439     }
440     ci[i+1] = ci[i] + cnzi;
441   }
442   /* column indices are in the list of free space */
443   /* Allocate space for cj, initialize cj, and */
444   /* destroy list of free space and other temporary array(s) */
445   ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr);
446   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
447   ierr = PetscFree(padenserow);CHKERRQ(ierr);
448 
449   /* Allocate space for ca */
450   ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
451   ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr);
452 
453   /* put together the new matrix */
454   ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr);
455 
456   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
457   /* Since these are PETSc arrays, change flags to free them as necessary. */
458   c = (Mat_SeqAIJ *)((*C)->data);
459   c->freedata = PETSC_TRUE;
460   c->nonew    = 0;
461 
462   /* Clean up. */
463   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
464 
465   ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 /*
470      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
471            C = P * A * P^T;
472      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
473 */
474 #undef __FUNCT__
475 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ"
476 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
477   int        ierr,flops=0;
478   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
479   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
480   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
481   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
482   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
483   int        *ci=c->i,*cj=c->j;
484   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
485   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
486   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
487 
488   PetscFunctionBegin;
489 
490   /* This error checking should be unnecessary if the symbolic was performed */
491   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
492   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
493   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
494   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
495   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
496 
497   /* Set up timers */
498   if (!logkey_matapplypapt_numeric) {
499     ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr);
500   }
501   ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
502 
503   ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr);
504   ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
505   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
506 
507   paj      = (int *)(paa + an);
508   pajdense = paj + an;
509 
510   for (i=0;i<pm;i++) {
511     /* Form sparse row of P*A */
512     pnzi  = pi[i+1] - pi[i];
513     panzj = 0;
514     for (j=0;j<pnzi;j++) {
515       arow = *pj++;
516       anzj = ai[arow+1] - ai[arow];
517       ajj  = aj + ai[arow];
518       aaj  = aa + ai[arow];
519       for (k=0;k<anzj;k++) {
520         if (!pajdense[ajj[k]]) {
521           pajdense[ajj[k]] = -1;
522           paj[panzj++]     = ajj[k];
523         }
524         paa[ajj[k]] += (*pa)*aaj[k];
525       }
526       flops += 2*anzj;
527       pa++;
528     }
529 
530     /* Sort the j index array for quick sparse axpy. */
531     ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr);
532 
533     /* Compute P*A*P^T using sparse inner products. */
534     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
535     cnzi = ci[i+1] - ci[i];
536     for (j=0;j<cnzi;j++) {
537       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
538       ptcol = *cj++;
539       ptnzj = pi[ptcol+1] - pi[ptcol];
540       ptj   = pjj + pi[ptcol];
541       ptaj  = pta + pi[ptcol];
542       sum   = 0.;
543       k1    = 0;
544       k2    = 0;
545       while ((k1<panzj) && (k2<ptnzj)) {
546         if (paj[k1]==ptj[k2]) {
547           sum += paa[paj[k1++]]*ptaj[k2++];
548         } else if (paj[k1] < ptj[k2]) {
549           k1++;
550         } else /* if (paj[k1] > ptj[k2]) */ {
551           k2++;
552         }
553       }
554       *ca++ = sum;
555     }
556 
557     /* Zero the current row info for P*A */
558     for (j=0;j<panzj;j++) {
559       paa[paj[j]]      = 0.;
560       pajdense[paj[j]] = 0;
561     }
562   }
563 
564   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
567   ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ"
573 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
574   int ierr;
575 
576   PetscFunctionBegin;
577   if (!logkey_matapplypapt) {
578     ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr);
579   }
580   ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
581   ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
582   ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
583   ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586