xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 634064b45b5c838063ae82f97ffb7e99245dcdb5)
1 /*
2   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 /*
11   Add a index set into a sorted linked list
12   input:
13     nidx      - number of input indices
14     indices   - interger array
15     idx_head  - the header of the list
16     idx_unset - the value indicating the entry in the list is not set yet
17     lnk       - linked list(an integer array) that is created
18   output:
19     nlnk      - number of newly added indices
20     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
21  */
22 #undef __FUNCT__
23 #define __FUNCT__ "LnklistAdd"
24 int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
25 {
26   int i,idx,lidx,entry,n;
27 
28   PetscFunctionBegin;
29   n    = 0;
30   lidx = idx_head;
31   i    = nidx;
32   while (i){
33     /* assume indices are almost in increasing order, starting from its end saves computation */
34     entry = indices[--i];
35     if (lnk[entry] == idx_unset) { /* new entry */
36       do {
37         idx = lidx;
38         lidx  = lnk[idx];
39       } while (entry > lidx);
40       lnk[idx] = entry;
41       lnk[entry] = lidx;
42       n++;
43     }
44   }
45   *nlnk = n;
46   PetscFunctionReturn(0);
47 }
48 
49 
50 static int logkey_matmatmult          = 0;
51 static int logkey_matmatmult_symbolic = 0;
52 static int logkey_matmatmult_numeric  = 0;
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatMatMult"
56 /*@
57    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
58 
59    Collective on Mat
60 
61    Input Parameters:
62 +  A - the left matrix
63 .  B - the right matrix
64 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
65 -  fill - expected fill as ratio of nonzeros in product matrix/nonzeros in original matrix
66 
67    Output Parameters:
68 .  C - the product matrix
69 
70    Notes:
71    C will be created and must be destroyed by the user with MatDestroy().
72 
73    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
74    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
75 
76    Level: intermediate
77 
78 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
79 @*/
80 int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
81 {
82   int  ierr;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
86   PetscValidType(A,1);
87   MatPreallocated(A);
88   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
89   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
90 
91   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
92   PetscValidType(B,2);
93   MatPreallocated(B);
94   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
95   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
96 
97   PetscValidPointer(C,3);
98 
99   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
100 
101   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
102   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
103   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
104 
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
110 int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
111 {
112   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,C_seq,C_mpi;
113   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
114   Mat_SeqAIJ    *c;
115   MatScalar     *ca;
116   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz;
117   IS            isrow,iscol;
118 
119   PetscFunctionBegin;
120   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
121   start = a->cstart;
122   cmap  = a->garray;
123   nzA   = a->A->n;
124   nzB   = a->B->n;
125   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
126   ncols = 0;
127   for (i=0; i<nzB; i++) {
128     if (cmap[i] < start) idx[ncols++] = cmap[i];
129     else break;
130   }
131   imark = i;
132   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
133   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
134   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
135   ierr = PetscFree(idx);CHKERRQ(ierr);
136   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
137   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr);/* reuse */
138   ierr = ISDestroy(iscol);CHKERRQ(ierr);
139   B_seq = bseq[0];
140 
141   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
142   start = a->rstart; end = a->rend;
143   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
144   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr); /* reuse */
145   ierr = ISDestroy(isrow);CHKERRQ(ierr);
146   ierr = ISDestroy(iscol);CHKERRQ(ierr);
147   A_seq = aseq[0];
148 
149   /* compute C_seq = A_seq * B_seq */
150   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, scall, fill,&C_seq);CHKERRQ(ierr);
151   /*
152   int rank;
153   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
154   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
155   */
156   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
157   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
158 
159   /* create a mpi matrix C_mpi that has C_seq as its local entries */
160   ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr);
161   ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr);
162 
163   c  = (Mat_SeqAIJ*)C_seq->data;
164   ci = c->i; cj = c->j; ca = c->a;
165   ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr);
166   o_nnz = d_nnz + C_seq->m;
167   nzA   = end-start;    /* max nonezero cols of the local diagonal part of C_mpi */
168   nzB   = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */
169   for (i=0; i< C_seq->m; i++){
170     ncols = ci[i+1] - ci[i];
171     d_nnz[i] = PetscMin(ncols,nzA);
172     o_nnz[i] = PetscMin(ncols,nzB);
173   }
174   ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr);
175   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
176 
177   /* set row values of C_mpi */
178   for (i=0; i< C_seq->m; i++){
179     grow  = start + i;
180     ncols = ci[i+1] - ci[i];
181     ierr = MatSetValues_MPIAIJ(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr);
182   }
183   ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
184   ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
185   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
186 
187   *C = C_mpi;
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
193 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
194   int ierr;
195   char symfunct[80],numfunct[80];
196   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
197 
198   PetscFunctionBegin;
199   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
200   /* When other implementations exist, attack the multiple dispatch problem. */
201   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
202   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
203   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
204   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
205   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
206 
207   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
208   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
209   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
210   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
211   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
212 
213   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
214   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
215   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
216   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
217   PetscFunctionReturn(0);
218 }
219 
220 #undef __FUNCT__
221 #define __FUNCT__ "MatMatMultSymbolic"
222 /*@
223    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
224    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
225 
226    Collective on Mat
227 
228    Input Parameters:
229 +  A - the left matrix
230 -  B - the right matrix
231 
232    Output Parameters:
233 .  C - the matrix containing the ij structure of product matrix
234 
235    Notes:
236    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
237 
238    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
239 
240    Level: intermediate
241 
242 .seealso: MatMatMult(),MatMatMultNumeric()
243 @*/
244 int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
245   /* Perhaps this "interface" routine should be moved into the interface directory.*/
246   /* To facilitate implementations with varying types, QueryFunction is used.*/
247   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
248   int  ierr;
249   char symfunct[80];
250   int  (*symbolic)(Mat,Mat,Mat *);
251 
252   PetscFunctionBegin;
253   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
254   PetscValidType(A,1);
255   MatPreallocated(A);
256   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
257   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
258 
259   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
260   PetscValidType(B,2);
261   MatPreallocated(B);
262   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
263   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
264   PetscValidPointer(C,3);
265 
266 
267   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
268 
269   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
270   /* When other implementations exist, attack the multiple dispatch problem. */
271   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
272   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
273   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
274   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
275   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
276   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
277 
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
283 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
284 {
285   int            ierr;
286   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
288   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
289   int            *ci,*cj,*lnk,idx0,idx;
290   int            am=A->M,bn=B->N,bm=B->M;
291   int            i,j,anzi,brow,bnzj,cnzi,nlnk;
292   MatScalar      *ca;
293 
294   PetscFunctionBegin;
295   /* Start timers */
296   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
297   /* Set up */
298   /* Allocate ci array, arrays for fill computation and */
299   /* free space for accumulating nonzero column info */
300   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
301   ci[0] = 0;
302 
303   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
304   for (i=0; i<bn; i++) lnk[i] = -1;
305 
306   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
307   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
308   current_space = free_space;
309 
310   /* Determine symbolic info for each row of the product: */
311   for (i=0;i<am;i++) {
312     anzi = ai[i+1] - ai[i];
313     cnzi = 0;
314     lnk[bn] = bn;
315     j       = anzi;
316     aj      = a->j + ai[i];
317     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
318       j--;
319       brow = *(aj + j);
320       bnzj = bi[brow+1] - bi[brow];
321       bjj  = bj + bi[brow];
322       /* add non-zero cols of B into the sorted linked list lnk */
323       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
324       cnzi += nlnk;
325     }
326 
327     /* If free space is not available, make more free space */
328     /* Double the amount of total space in the list */
329     if (current_space->local_remaining<cnzi) {
330       printf("MatMatMult_Symbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);
331       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
332     }
333 
334     /* Copy data into free space, and zero out denserow and lnk */
335     idx = bn;
336     for (j=0; j<cnzi; j++){
337       idx0 = idx;
338       idx  = lnk[idx0];
339       *current_space->array++ = idx;
340       lnk[idx0] = -1;
341     }
342     lnk[idx] = -1;
343 
344     current_space->local_used      += cnzi;
345     current_space->local_remaining -= cnzi;
346 
347     ci[i+1] = ci[i] + cnzi;
348   }
349 
350   /* Column indices are in the list of free space */
351   /* Allocate space for cj, initialize cj, and */
352   /* destroy list of free space and other temporary array(s) */
353   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
354   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
355   ierr = PetscFree(lnk);CHKERRQ(ierr);
356 
357   /* Allocate space for ca */
358   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
359   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
360 
361   /* put together the new matrix */
362   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
363 
364   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
365   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
366   c = (Mat_SeqAIJ *)((*C)->data);
367   c->freedata = PETSC_TRUE;
368   c->nonew    = 0;
369 
370   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "MatMatMultNumeric"
376 /*@
377    MatMatMultNumeric - Performs the numeric matrix-matrix product.
378    Call this routine after first calling MatMatMultSymbolic().
379 
380    Collective on Mat
381 
382    Input Parameters:
383 +  A - the left matrix
384 -  B - the right matrix
385 
386    Output Parameters:
387 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
388 
389    Notes:
390    C must have been created with MatMatMultSymbolic.
391 
392    This routine is currently only implemented for SeqAIJ type matrices.
393 
394    Level: intermediate
395 
396 .seealso: MatMatMult(),MatMatMultSymbolic()
397 @*/
398 int MatMatMultNumeric(Mat A,Mat B,Mat C){
399   /* Perhaps this "interface" routine should be moved into the interface directory.*/
400   /* To facilitate implementations with varying types, QueryFunction is used.*/
401   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
402   int ierr;
403   char numfunct[80];
404   int (*numeric)(Mat,Mat,Mat);
405 
406   PetscFunctionBegin;
407 
408   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
409   PetscValidType(A,1);
410   MatPreallocated(A);
411   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
412   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
413 
414   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
415   PetscValidType(B,2);
416   MatPreallocated(B);
417   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
418   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
419 
420   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
421   PetscValidType(C,3);
422   MatPreallocated(C);
423   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
424   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
425 
426   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
427   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
428   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
429 
430   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
431   /* When other implementations exist, attack the multiple dispatch problem. */
432   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
433   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
434   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
435   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
436   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
437 
438   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
445 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
446 {
447   int        ierr,flops=0;
448   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
449   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
450   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
451   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
452   int        am=A->M,cn=C->N;
453   int        i,j,k,anzi,bnzi,cnzi,brow;
454   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
455 
456   PetscFunctionBegin;
457 
458   /* Start timers */
459   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
460 
461   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
462   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
463   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
464   /* Traverse A row-wise. */
465   /* Build the ith row in C by summing over nonzero columns in A, */
466   /* the rows of B corresponding to nonzeros of A. */
467   for (i=0;i<am;i++) {
468     anzi = ai[i+1] - ai[i];
469     for (j=0;j<anzi;j++) {
470       brow = *aj++;
471       bnzi = bi[brow+1] - bi[brow];
472       bjj  = bj + bi[brow];
473       baj  = ba + bi[brow];
474       for (k=0;k<bnzi;k++) {
475         temp[bjj[k]] += (*aa)*baj[k];
476       }
477       flops += 2*bnzi;
478       aa++;
479     }
480     /* Store row back into C, and re-zero temp */
481     cnzi = ci[i+1] - ci[i];
482     for (j=0;j<cnzi;j++) {
483       ca[j] = temp[cj[j]];
484       temp[cj[j]] = 0.0;
485     }
486     ca += cnzi;
487     cj += cnzi;
488   }
489   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
490   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
491 
492   /* Free temp */
493   ierr = PetscFree(temp);CHKERRQ(ierr);
494   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
495   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
501 int RegisterMatMatMultRoutines_Private(Mat A) {
502   int ierr;
503 
504   PetscFunctionBegin;
505 #ifndef OLD
506   if (!logkey_matmatmult) {
507     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
508   }
509 #endif
510   if (!logkey_matmatmult_symbolic) {
511     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
512   }
513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
514                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
515                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
516   if (!logkey_matmatmult_numeric) {
517     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
518   }
519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
520                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
521                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524