xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 4ce68768e41e382c1fc596e135fd299def09fddc)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 typedef struct { /* used by MatMatMult_MPIAIJ_MPIAIJ for reusing symbolic mat product */
11   IS     isrowa,isrowb,iscolb;
12   Mat    *aseq,*bseq,C_seq;
13 } Mat_MatMatMultMPI;
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMatMult"
17 /*@
18    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
19 
20    Collective on Mat
21 
22    Input Parameters:
23 +  A - the left matrix
24 .  B - the right matrix
25 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
26 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
27 
28    Output Parameters:
29 .  C - the product matrix
30 
31    Notes:
32    C will be created and must be destroyed by the user with MatDestroy().
33 
34    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
35    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
36 
37    Level: intermediate
38 
39 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
40 @*/
41 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
42 {
43   PetscErrorCode ierr;
44   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
45   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
46 
47   PetscFunctionBegin;
48   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
49   PetscValidType(A,1);
50   MatPreallocated(A);
51   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
52   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
53   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
54   PetscValidType(B,2);
55   MatPreallocated(B);
56   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
57   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
58   PetscValidPointer(C,3);
59   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
60 
61   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
62 
63   /* For now, we do not dispatch based on the type of A and B */
64   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
65   fA = A->ops->matmult;
66   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
67   fB = B->ops->matmult;
68   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
69   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
70 
71   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
72   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
73   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
74 
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
80 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   if (scall == MAT_INITIAL_MATRIX){
86     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
87   } else if (scall == MAT_REUSE_MATRIX){
88     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
89   } else {
90     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
97 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
98   PetscErrorCode ierr;
99 
100   PetscFunctionBegin;
101   if (scall == MAT_INITIAL_MATRIX){
102     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
103   }
104   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatMatMultSymbolic"
110 /*@
111    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
112    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
113 
114    Collective on Mat
115 
116    Input Parameters:
117 +  A - the left matrix
118 .  B - the right matrix
119 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
120 
121    Output Parameters:
122 .  C - the matrix containing the ij structure of product matrix
123 
124    Notes:
125    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
126 
127    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
128 
129    Level: intermediate
130 
131 .seealso: MatMatMult(),MatMatMultNumeric()
132 @*/
133 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
134   PetscErrorCode ierr;
135   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
136   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
137 
138   PetscFunctionBegin;
139   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
140   PetscValidType(A,1);
141   MatPreallocated(A);
142   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
143   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
144 
145   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
146   PetscValidType(B,2);
147   MatPreallocated(B);
148   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
149   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
150   PetscValidPointer(C,3);
151 
152   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
153   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
154 
155   /* For now, we do not dispatch based on the type of A and P */
156   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
157   Asymbolic = A->ops->matmultsymbolic;
158   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
159   Bsymbolic = B->ops->matmultsymbolic;
160   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
161   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
162 
163   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
164   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
165   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
166 
167   PetscFunctionReturn(0);
168 }
169 
170 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
171 #undef __FUNCT__
172 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
173 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
174 {
175   PetscErrorCode ierr;
176   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
177 
178   PetscFunctionBegin;
179   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
180   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
181   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
182   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
183   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
184   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
185   ierr = PetscFree(mult);CHKERRQ(ierr);
186 
187   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
188 
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
194 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
195 {
196   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
197   PetscErrorCode ierr;
198   int               *idx,i,start,end,ncols,imark,nzA,nzB,*cmap;
199   Mat_MatMatMultMPI *mult;
200 
201   PetscFunctionBegin;
202   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
203 
204   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
205   start = a->cstart;
206   cmap  = a->garray;
207   nzA   = a->A->n;
208   nzB   = a->B->n;
209   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
210   ncols = 0;
211   for (i=0; i<nzB; i++) {
212     if (cmap[i] < start) idx[ncols++] = cmap[i];
213     else break;
214   }
215   imark = i;
216   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
217   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
218   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
219   ierr = PetscFree(idx);CHKERRQ(ierr);
220   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
221   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr)
222 
223   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
224   start = a->rstart; end = a->rend;
225   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
226   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
227 
228   /* compute C_seq = A_seq * B_seq */
229   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
230 
231   /* create mpi matrix C by concatinating C_seq */
232   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
233   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
234 
235   /* attach the supporting struct to C for reuse of symbolic C */
236   (*C)->spptr         = (void*)mult;
237   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
238 
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
244 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
245 {
246   PetscErrorCode ierr;
247   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
248   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
249   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
250   int            *ci,*cj,*lnk;
251   int            am=A->M,bn=B->N,bm=B->M;
252   int            i,j,anzi,brow,bnzj,cnzi,nlnk,nspacedouble=0;
253   MatScalar      *ca;
254 
255   PetscFunctionBegin;
256   /* Set up */
257   /* Allocate ci array, arrays for fill computation and */
258   /* free space for accumulating nonzero column info */
259   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
260   ci[0] = 0;
261 
262   /* create and initialize a linked list for symbolic product */
263   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
264   ierr = PetscLLInitialize(bn,bn);CHKERRQ(ierr);
265 
266   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
267   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
268   current_space = free_space;
269 
270   /* Determine symbolic info for each row of the product: */
271   for (i=0;i<am;i++) {
272     anzi = ai[i+1] - ai[i];
273     cnzi = 0;
274     j    = anzi;
275     aj   = a->j + ai[i];
276     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
277       j--;
278       brow = *(aj + j);
279       bnzj = bi[brow+1] - bi[brow];
280       bjj  = bj + bi[brow];
281       /* add non-zero cols of B into the sorted linked list lnk */
282       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk);CHKERRQ(ierr);
283       cnzi += nlnk;
284     }
285 
286     /* If free space is not available, make more free space */
287     /* Double the amount of total space in the list */
288     if (current_space->local_remaining<cnzi) {
289       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
290       nspacedouble++;
291     }
292 
293     /* Copy data into free space, then initialize lnk */
294     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array);CHKERRQ(ierr);
295     current_space->array           += cnzi;
296     current_space->local_used      += cnzi;
297     current_space->local_remaining -= cnzi;
298 
299     ci[i+1] = ci[i] + cnzi;
300   }
301 
302   /* Column indices are in the list of free space */
303   /* Allocate space for cj, initialize cj, and */
304   /* destroy list of free space and other temporary array(s) */
305   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
306   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
307   ierr = PetscFree(lnk);CHKERRQ(ierr);
308 
309   /* Allocate space for ca */
310   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
311   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
312 
313   /* put together the new symbolic matrix */
314   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
315 
316   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
317   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
318   c = (Mat_SeqAIJ *)((*C)->data);
319   c->freedata = PETSC_TRUE;
320   c->nonew    = 0;
321 
322   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatMatMultNumeric"
328 /*@
329    MatMatMultNumeric - Performs the numeric matrix-matrix product.
330    Call this routine after first calling MatMatMultSymbolic().
331 
332    Collective on Mat
333 
334    Input Parameters:
335 +  A - the left matrix
336 -  B - the right matrix
337 
338    Output Parameters:
339 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
340 
341    Notes:
342    C must have been created with MatMatMultSymbolic.
343 
344    This routine is currently only implemented for SeqAIJ type matrices.
345 
346    Level: intermediate
347 
348 .seealso: MatMatMult(),MatMatMultSymbolic()
349 @*/
350 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
351   PetscErrorCode ierr;
352   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
353   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
354 
355   PetscFunctionBegin;
356 
357   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
358   PetscValidType(A,1);
359   MatPreallocated(A);
360   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
361   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
362 
363   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
364   PetscValidType(B,2);
365   MatPreallocated(B);
366   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
367   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
368 
369   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
370   PetscValidType(C,3);
371   MatPreallocated(C);
372   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
373   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
374 
375   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
376   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
377   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
378 
379   /* For now, we do not dispatch based on the type of A and B */
380   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
381   Anumeric = A->ops->matmultnumeric;
382   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
383   Bnumeric = B->ops->matmultnumeric;
384   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
385   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
386 
387   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
388   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
389   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
395 #undef __FUNCT__
396 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
397 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
398 {
399   PetscErrorCode ierr;
400   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
401 
402   PetscFunctionBegin;
403   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
404   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
405   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
406 
407   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
408   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
409 
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
415 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
416 {
417   PetscErrorCode ierr;
418   int        flops=0;
419   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
420   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
421   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
422   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
423   int        am=A->M,cn=C->N;
424   int        i,j,k,anzi,bnzi,cnzi,brow;
425   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
426 
427   PetscFunctionBegin;
428 
429   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
430   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
431   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
432   /* Traverse A row-wise. */
433   /* Build the ith row in C by summing over nonzero columns in A, */
434   /* the rows of B corresponding to nonzeros of A. */
435   for (i=0;i<am;i++) {
436     anzi = ai[i+1] - ai[i];
437     for (j=0;j<anzi;j++) {
438       brow = *aj++;
439       bnzi = bi[brow+1] - bi[brow];
440       bjj  = bj + bi[brow];
441       baj  = ba + bi[brow];
442       for (k=0;k<bnzi;k++) {
443         temp[bjj[k]] += (*aa)*baj[k];
444       }
445       flops += 2*bnzi;
446       aa++;
447     }
448     /* Store row back into C, and re-zero temp */
449     cnzi = ci[i+1] - ci[i];
450     for (j=0;j<cnzi;j++) {
451       ca[j] = temp[cj[j]];
452       temp[cj[j]] = 0.0;
453     }
454     ca += cnzi;
455     cj += cnzi;
456   }
457   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459 
460   /* Free temp */
461   ierr = PetscFree(temp);CHKERRQ(ierr);
462   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465