xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 958c9bcce270cdc364c8832dcd56bbb1c4a38e24)
1 /*
2   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 /*
11   Add a index set into a sorted linked list
12   input:
13     nidx      - number of input indices
14     indices   - interger array
15     idx_head  - the header of the list
16     idx_unset - the value indicating the entry in the list is not set yet
17     lnk       - linked list(an integer array) that is created
18   output:
19     nlnk      - number of newly added indices
20     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
21  */
22 #undef __FUNCT__
23 #define __FUNCT__ "LnklistAdd"
24 int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
25 {
26   int i,idx,lidx,entry,n;
27 
28   PetscFunctionBegin;
29   n    = 0;
30   lidx = idx_head;
31   i    = nidx;
32   while (i){
33     /* assume indices are almost in increasing order, starting from its end saves computation */
34     entry = indices[--i];
35     if (lnk[entry] == idx_unset) { /* new entry */
36       do {
37         idx = lidx;
38         lidx  = lnk[idx];
39       } while (entry > lidx);
40       lnk[idx] = entry;
41       lnk[entry] = lidx;
42       n++;
43     }
44   }
45   *nlnk = n;
46   PetscFunctionReturn(0);
47 }
48 
49 
50 static int logkey_matmatmult          = 0;
51 static int logkey_matmatmult_symbolic = 0;
52 static int logkey_matmatmult_numeric  = 0;
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatMatMult"
56 /*@
57    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
58 
59    Collective on Mat
60 
61    Input Parameters:
62 +  A - the left matrix
63 -  B - the right matrix
64 
65    Output Parameters:
66 .  C - the product matrix
67 
68    Notes:
69    C will be created and must be destroyed by the user with MatDestroy().
70 
71    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
72    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
73 
74    Level: intermediate
75 
76 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
77 @*/
78 int MatMatMult(Mat A,Mat B, Mat *C)
79 {
80   int  ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
84   PetscValidType(A,1);
85   MatPreallocated(A);
86   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
87   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
88 
89   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
90   PetscValidType(B,2);
91   MatPreallocated(B);
92   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
93   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
94 
95   PetscValidPointer(C,3);
96 
97   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
98 
99   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
100   ierr = (*A->ops->matmult)(A,B,C);CHKERRQ(ierr);
101   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
108 int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B, Mat *C)
109 {
110   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq,C_mpi;
111   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
112   Mat_SeqAIJ    *c;
113   MatScalar     *ca;
114   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz;
115   IS            isrow,iscol;
116 
117   PetscFunctionBegin;
118   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
119   start = a->cstart;
120   cmap  = a->garray;
121   nzA   = a->A->n;
122   nzB   = a->B->n;
123   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
124   ncols = 0;
125   for (i=0; i<nzB; i++) {
126     if (cmap[i] < start) idx[ncols++] = cmap[i];
127     else break;
128   }
129   imark = i;
130   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
131   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
132   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
133   ierr = PetscFree(idx);CHKERRQ(ierr);
134   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
135   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr);
136   ierr = ISDestroy(iscol);CHKERRQ(ierr);
137   B_seq = bseq[0];
138 
139   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
140   start = a->rstart; end = a->rend;
141   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
142   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr);
143   ierr = ISDestroy(isrow);CHKERRQ(ierr);
144   ierr = ISDestroy(iscol);CHKERRQ(ierr);
145   A_seq = aseq[0];
146 
147   /* compute C_seq = A_seq * B_seq */
148   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, &C_seq);CHKERRQ(ierr);
149   /*
150   int rank;
151   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
152   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
153   */
154   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
155   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
156 
157   /* create a mpi matrix C_mpi that has C_seq as its local entries */
158   ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr);
159   ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr);
160 
161   c  = (Mat_SeqAIJ*)C_seq->data;
162   ci = c->i; cj = c->j; ca = c->a;
163   ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr);
164   o_nnz = d_nnz + C_seq->m;
165   nzA   = end-start;    /* max nonezero cols of the local diagonal part of C_mpi */
166   nzB   = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */
167   for (i=0; i< C_seq->m; i++){
168     ncols = ci[i+1] - ci[i];
169     d_nnz[i] = PetscMin(ncols,nzA);
170     o_nnz[i] = PetscMin(ncols,nzB);
171   }
172   ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr);
173   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
174 
175   /* set row values of C_mpi */
176   for (i=0; i< C_seq->m; i++){
177     grow  = start + i;
178     ncols = ci[i+1] - ci[i];
179     ierr = MatSetValues(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr);
180   }
181   ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182   ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
183   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
184 
185   *C = C_mpi;
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
191 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) {
192   int ierr;
193   char symfunct[80],numfunct[80];
194   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
195 
196   PetscFunctionBegin;
197   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
198   /* When other implementations exist, attack the multiple dispatch problem. */
199   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
200   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
201   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
202   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
203   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
204 
205   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
206   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
207   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
208   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
209   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
210 
211   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
212   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
213   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
214   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatMatMultSymbolic"
220 /*@
221    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
222    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
223 
224    Collective on Mat
225 
226    Input Parameters:
227 +  A - the left matrix
228 -  B - the right matrix
229 
230    Output Parameters:
231 .  C - the matrix containing the ij structure of product matrix
232 
233    Notes:
234    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
235 
236    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
237 
238    Level: intermediate
239 
240 .seealso: MatMatMult(),MatMatMultNumeric()
241 @*/
242 int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
243   /* Perhaps this "interface" routine should be moved into the interface directory.*/
244   /* To facilitate implementations with varying types, QueryFunction is used.*/
245   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
246   int  ierr;
247   char symfunct[80];
248   int  (*symbolic)(Mat,Mat,Mat *);
249 
250   PetscFunctionBegin;
251   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
252   PetscValidType(A,1);
253   MatPreallocated(A);
254   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
255   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
256 
257   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
258   PetscValidType(B,2);
259   MatPreallocated(B);
260   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
261   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
262   PetscValidPointer(C,3);
263 
264 
265   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
266 
267   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
268   /* When other implementations exist, attack the multiple dispatch problem. */
269   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
270   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
271   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
272   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
273   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
274   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
275 
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
281 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
282 {
283   int            ierr;
284   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
285   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
286   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
287   int            *ci,*cj,*lnk,idx0,idx;
288   int            am=A->M,bn=B->N,bm=B->M;
289   int            i,j,anzi,brow,bnzj,cnzi,nlnk;
290   MatScalar      *ca;
291 
292   PetscFunctionBegin;
293   /* Start timers */
294   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
295   /* Set up */
296   /* Allocate ci array, arrays for fill computation and */
297   /* free space for accumulating nonzero column info */
298   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
299   ci[0] = 0;
300 
301   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
302   for (i=0; i<bn; i++) lnk[i] = -1;
303 
304   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
305   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
306   current_space = free_space;
307 
308   /* Determine symbolic info for each row of the product: */
309   for (i=0;i<am;i++) {
310     anzi = ai[i+1] - ai[i];
311     cnzi = 0;
312     lnk[bn] = bn;
313     j       = anzi;
314     aj      = a->j + ai[i];
315     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
316       j--;
317       brow = *(aj + j);
318       bnzj = bi[brow+1] - bi[brow];
319       bjj  = bj + bi[brow];
320       /* add non-zero cols of B into the sorted linked list lnk */
321       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
322       cnzi += nlnk;
323     }
324 
325     /* If free space is not available, make more free space */
326     /* Double the amount of total space in the list */
327     if (current_space->local_remaining<cnzi) {
328       printf("...%d -th row, double space ...\n",i);
329       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
330     }
331 
332     /* Copy data into free space, and zero out denserow and lnk */
333     idx = bn;
334     for (j=0; j<cnzi; j++){
335       idx0 = idx;
336       idx  = lnk[idx0];
337       *current_space->array++ = idx;
338       lnk[idx0] = -1;
339     }
340     lnk[idx] = -1;
341 
342     current_space->local_used      += cnzi;
343     current_space->local_remaining -= cnzi;
344 
345     ci[i+1] = ci[i] + cnzi;
346   }
347 
348   /* Column indices are in the list of free space */
349   /* Allocate space for cj, initialize cj, and */
350   /* destroy list of free space and other temporary array(s) */
351   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
352   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
353   ierr = PetscFree(lnk);CHKERRQ(ierr);
354 
355   /* Allocate space for ca */
356   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
357   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
358 
359   /* put together the new matrix */
360   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
361 
362   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
363   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
364   c = (Mat_SeqAIJ *)((*C)->data);
365   c->freedata = PETSC_TRUE;
366   c->nonew    = 0;
367 
368   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 
372 #undef __FUNCT__
373 #define __FUNCT__ "MatMatMultNumeric"
374 /*@
375    MatMatMultNumeric - Performs the numeric matrix-matrix product.
376    Call this routine after first calling MatMatMultSymbolic().
377 
378    Collective on Mat
379 
380    Input Parameters:
381 +  A - the left matrix
382 -  B - the right matrix
383 
384    Output Parameters:
385 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
386 
387    Notes:
388    C must have been created with MatMatMultSymbolic.
389 
390    This routine is currently only implemented for SeqAIJ type matrices.
391 
392    Level: intermediate
393 
394 .seealso: MatMatMult(),MatMatMultSymbolic()
395 @*/
396 int MatMatMultNumeric(Mat A,Mat B,Mat C){
397   /* Perhaps this "interface" routine should be moved into the interface directory.*/
398   /* To facilitate implementations with varying types, QueryFunction is used.*/
399   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
400   int ierr;
401   char numfunct[80];
402   int (*numeric)(Mat,Mat,Mat);
403 
404   PetscFunctionBegin;
405 
406   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
407   PetscValidType(A,1);
408   MatPreallocated(A);
409   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
410   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
411 
412   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
413   PetscValidType(B,2);
414   MatPreallocated(B);
415   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
416   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
417 
418   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
419   PetscValidType(C,3);
420   MatPreallocated(C);
421   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
422   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
423 
424   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
425   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
426   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
427 
428   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
429   /* When other implementations exist, attack the multiple dispatch problem. */
430   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
431   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
432   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
433   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
434   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
435 
436   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
437 
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
443 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
444 {
445   int        ierr,flops=0;
446   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
447   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
448   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
449   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
450   int        am=A->M,cn=C->N;
451   int        i,j,k,anzi,bnzi,cnzi,brow;
452   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
453 
454   PetscFunctionBegin;
455 
456   /* Start timers */
457   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
458 
459   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
460   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
461   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
462   /* Traverse A row-wise. */
463   /* Build the ith row in C by summing over nonzero columns in A, */
464   /* the rows of B corresponding to nonzeros of A. */
465   for (i=0;i<am;i++) {
466     anzi = ai[i+1] - ai[i];
467     for (j=0;j<anzi;j++) {
468       brow = *aj++;
469       bnzi = bi[brow+1] - bi[brow];
470       bjj  = bj + bi[brow];
471       baj  = ba + bi[brow];
472       for (k=0;k<bnzi;k++) {
473         temp[bjj[k]] += (*aa)*baj[k];
474       }
475       flops += 2*bnzi;
476       aa++;
477     }
478     /* Store row back into C, and re-zero temp */
479     cnzi = ci[i+1] - ci[i];
480     for (j=0;j<cnzi;j++) {
481       ca[j] = temp[cj[j]];
482       temp[cj[j]] = 0.0;
483     }
484     ca += cnzi;
485     cj += cnzi;
486   }
487   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
488   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
489 
490   /* Free temp */
491   ierr = PetscFree(temp);CHKERRQ(ierr);
492   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
493   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
499 int RegisterMatMatMultRoutines_Private(Mat A) {
500   int ierr;
501 
502   PetscFunctionBegin;
503 #ifdef OLD
504   if (!logkey_matmatmult) {
505     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
506   }
507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij",
508                                            "MatMatMult_SeqAIJ_SeqAIJ",
509                                            MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
510 #endif
511   if (!logkey_matmatmult_symbolic) {
512     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
513   }
514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
515                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
516                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
517   if (!logkey_matmatmult_numeric) {
518     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
519   }
520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
521                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
522                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525