xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 4c8c32f384357201972f9dd67b0f2e901704fd35)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatMatMult"
12 /*@
13    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the left matrix
19 .  B - the right matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of AIJ matrices and classes
30    which inherit from AIJ.  C will be of type MATAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
35 @*/
36 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
37 {
38   PetscErrorCode ierr;
39   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
40   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
41 
42   PetscFunctionBegin;
43   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
44   PetscValidType(A,1);
45   MatPreallocated(A);
46   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
47   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
48   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
49   PetscValidType(B,2);
50   MatPreallocated(B);
51   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
52   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
53   PetscValidPointer(C,3);
54   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and B */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->matmult;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
62   fB = B->ops->matmult;
63   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
64   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
67   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
69 
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
75 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
76 {
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   if (scall == MAT_INITIAL_MATRIX){
81     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
82   } else if (scall == MAT_REUSE_MATRIX){
83     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
84   } else {
85     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
92 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
93   PetscErrorCode ierr;
94 
95   PetscFunctionBegin;
96   if (scall == MAT_INITIAL_MATRIX){
97     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
98   }
99   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatMatMultSymbolic"
105 /*@
106    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
107    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
108 
109    Collective on Mat
110 
111    Input Parameters:
112 +  A - the left matrix
113 .  B - the right matrix
114 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
115 
116    Output Parameters:
117 .  C - the matrix containing the ij structure of product matrix
118 
119    Notes:
120    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
121 
122    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
123 
124    Level: intermediate
125 
126 .seealso: MatMatMult(),MatMatMultNumeric()
127 @*/
128 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
129   PetscErrorCode ierr;
130   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
131   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
132 
133   PetscFunctionBegin;
134   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
135   PetscValidType(A,1);
136   MatPreallocated(A);
137   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
138   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
139 
140   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
141   PetscValidType(B,2);
142   MatPreallocated(B);
143   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
144   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
145   PetscValidPointer(C,3);
146 
147   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
148   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
149 
150   /* For now, we do not dispatch based on the type of A and P */
151   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
152   Asymbolic = A->ops->matmultsymbolic;
153   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
154   Bsymbolic = B->ops->matmultsymbolic;
155   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
156   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
157 
158   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
159   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
160   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
161 
162   PetscFunctionReturn(0);
163 }
164 
165 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
166 #undef __FUNCT__
167 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
168 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
169 {
170   PetscErrorCode ierr;
171   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
172 
173   PetscFunctionBegin;
174   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
175   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
176   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
177   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
178   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
179   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
180   ierr = PetscFree(mult);CHKERRQ(ierr);
181 
182   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
183 
184   PetscFunctionReturn(0);
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
189 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
190 {
191   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
192   PetscErrorCode    ierr;
193   int               *idx,i,start,end,ncols,nzA,nzB,*cmap;
194   Mat_MatMatMultMPI *mult;
195 
196   PetscFunctionBegin;
197   if (a->cstart != b->rstart || a->cend != b->rend){
198     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
199   }
200   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
201 
202   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
203   start = a->cstart;
204   cmap  = a->garray;
205   nzA   = a->A->n;
206   nzB   = a->B->n;
207   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
208   ncols = 0;
209   for (i=0; i<nzB; i++) {
210     if (cmap[i] < start) idx[ncols++] = cmap[i];
211     else break;
212   }
213   mult->brstart = i;
214   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
215   for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i];
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
217   ierr = PetscFree(idx);CHKERRQ(ierr);
218   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
219   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr);
220 
221   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
222   start = a->rstart; end = a->rend;
223   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
224   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
225 
226   /* compute C_seq = A_seq * B_seq */
227   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
228 
229   /* create mpi matrix C by concatinating C_seq */
230   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
231   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
232 
233   /* attach the supporting struct to C for reuse of symbolic C */
234   (*C)->spptr         = (void*)mult;
235   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
236 
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
242 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
243 {
244   PetscErrorCode ierr;
245   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
246   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
247   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
248   int            *ci,*cj,*lnk;
249   int            am=A->M,bn=B->N,bm=B->M;
250   int            i,j,anzi,brow,bnzj,cnzi,nlnk,nspacedouble=0;
251   MatScalar      *ca;
252 
253   PetscFunctionBegin;
254   /* Set up */
255   /* Allocate ci array, arrays for fill computation and */
256   /* free space for accumulating nonzero column info */
257   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
258   ci[0] = 0;
259 
260   /* create and initialize a linked list for symbolic product */
261   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
262   ierr = PetscLLInitialize(bn,bn);CHKERRQ(ierr);
263 
264   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
265   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
266   current_space = free_space;
267 
268   /* Determine symbolic info for each row of the product: */
269   for (i=0;i<am;i++) {
270     anzi = ai[i+1] - ai[i];
271     cnzi = 0;
272     j    = anzi;
273     aj   = a->j + ai[i];
274     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
275       j--;
276       brow = *(aj + j);
277       bnzj = bi[brow+1] - bi[brow];
278       bjj  = bj + bi[brow];
279       /* add non-zero cols of B into the sorted linked list lnk */
280       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk);CHKERRQ(ierr);
281       cnzi += nlnk;
282     }
283 
284     /* If free space is not available, make more free space */
285     /* Double the amount of total space in the list */
286     if (current_space->local_remaining<cnzi) {
287       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
288       nspacedouble++;
289     }
290 
291     /* Copy data into free space, then initialize lnk */
292     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array);CHKERRQ(ierr);
293     current_space->array           += cnzi;
294     current_space->local_used      += cnzi;
295     current_space->local_remaining -= cnzi;
296 
297     ci[i+1] = ci[i] + cnzi;
298   }
299 
300   /* Column indices are in the list of free space */
301   /* Allocate space for cj, initialize cj, and */
302   /* destroy list of free space and other temporary array(s) */
303   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
304   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
305   ierr = PetscFree(lnk);CHKERRQ(ierr);
306 
307   /* Allocate space for ca */
308   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
309   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
310 
311   /* put together the new symbolic matrix */
312   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
313 
314   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
315   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
316   c = (Mat_SeqAIJ *)((*C)->data);
317   c->freedata = PETSC_TRUE;
318   c->nonew    = 0;
319 
320   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMatMultNumeric"
326 /*@
327    MatMatMultNumeric - Performs the numeric matrix-matrix product.
328    Call this routine after first calling MatMatMultSymbolic().
329 
330    Collective on Mat
331 
332    Input Parameters:
333 +  A - the left matrix
334 -  B - the right matrix
335 
336    Output Parameters:
337 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
338 
339    Notes:
340    C must have been created with MatMatMultSymbolic.
341 
342    This routine is currently only implemented for SeqAIJ type matrices.
343 
344    Level: intermediate
345 
346 .seealso: MatMatMult(),MatMatMultSymbolic()
347 @*/
348 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
349   PetscErrorCode ierr;
350   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
351   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
352 
353   PetscFunctionBegin;
354 
355   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
356   PetscValidType(A,1);
357   MatPreallocated(A);
358   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
359   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
360 
361   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
362   PetscValidType(B,2);
363   MatPreallocated(B);
364   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
365   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
366 
367   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
368   PetscValidType(C,3);
369   MatPreallocated(C);
370   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
371   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
372 
373   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
374   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
375   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
376 
377   /* For now, we do not dispatch based on the type of A and B */
378   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
379   Anumeric = A->ops->matmultnumeric;
380   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
381   Bnumeric = B->ops->matmultnumeric;
382   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
383   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
384 
385   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
386   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
387   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
388 
389   PetscFunctionReturn(0);
390 }
391 
392 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
393 #undef __FUNCT__
394 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
395 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
396 {
397   PetscErrorCode ierr;
398   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
399 
400   PetscFunctionBegin;
401   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
402   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
403   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
404 
405   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
406   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
413 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
414 {
415   PetscErrorCode ierr;
416   int        flops=0;
417   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
418   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
419   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
420   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
421   int        am=A->M,cn=C->N;
422   int        i,j,k,anzi,bnzi,cnzi,brow;
423   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
424 
425   PetscFunctionBegin;
426 
427   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
428   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
429   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
430   /* Traverse A row-wise. */
431   /* Build the ith row in C by summing over nonzero columns in A, */
432   /* the rows of B corresponding to nonzeros of A. */
433   for (i=0;i<am;i++) {
434     anzi = ai[i+1] - ai[i];
435     for (j=0;j<anzi;j++) {
436       brow = *aj++;
437       bnzi = bi[brow+1] - bi[brow];
438       bjj  = bj + bi[brow];
439       baj  = ba + bi[brow];
440       for (k=0;k<bnzi;k++) {
441         temp[bjj[k]] += (*aa)*baj[k];
442       }
443       flops += 2*bnzi;
444       aa++;
445     }
446     /* Store row back into C, and re-zero temp */
447     cnzi = ci[i+1] - ci[i];
448     for (j=0;j<cnzi;j++) {
449       ca[j] = temp[cj[j]];
450       temp[cj[j]] = 0.0;
451     }
452     ca += cnzi;
453     cj += cnzi;
454   }
455   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457 
458   /* Free temp */
459   ierr = PetscFree(temp);CHKERRQ(ierr);
460   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMatMultTranspose"
466 /*@
467    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
468 
469    Collective on Mat
470 
471    Input Parameters:
472 +  A - the left matrix
473 .  B - the right matrix
474 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
475 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
476 
477    Output Parameters:
478 .  C - the product matrix
479 
480    Notes:
481    C will be created and must be destroyed by the user with MatDestroy().
482 
483    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
484    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
485 
486    Level: intermediate
487 
488 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
489 @*/
490 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
491 {
492   PetscErrorCode ierr;
493   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
494   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
495 
496   PetscFunctionBegin;
497   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
498   PetscValidType(A,1);
499   MatPreallocated(A);
500   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
501   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
502   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
503   PetscValidType(B,2);
504   MatPreallocated(B);
505   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
506   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
507   PetscValidPointer(C,3);
508   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
509 
510   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
511 
512   fA = A->ops->matmulttranspose;
513   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
514   fB = B->ops->matmulttranspose;
515   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
516   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
517 
518   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
519   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
520   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
521 
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
527 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
528   PetscErrorCode ierr;
529 
530   PetscFunctionBegin;
531   if (scall == MAT_INITIAL_MATRIX){
532     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
533   }
534   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
540 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
541 {
542   PetscErrorCode ierr;
543   Mat            At;
544   int            *ati,*atj;
545 
546   PetscFunctionBegin;
547   /* create symbolic At */
548   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
549   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
550 
551   /* get symbolic C=At*B */
552   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
553 
554   /* clean up */
555   ierr = MatDestroy(At);CHKERRQ(ierr);
556   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
557 
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
563 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
564 {
565   PetscErrorCode ierr;
566   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
567   int            am=A->m,anzi,*ai=b->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
568   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
569   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
570 
571   PetscFunctionBegin;
572   /* clear old values in C */
573   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
574 
575   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
576   for (i=0;i<am;i++) {
577     bj   = b->j + bi[i];
578     ba   = b->a + bi[i];
579     bnzi = bi[i+1] - bi[i];
580     anzi = ai[i+1] - ai[i];
581     for (j=0; j<anzi; j++) {
582       nextb = 0;
583       crow  = *aj++;
584       cjj   = cj + ci[crow];
585       caj   = ca + ci[crow];
586       /* perform sparse axpy operation.  Note cjj includes bj. */
587       for (k=0; nextb<bnzi; k++) {
588         /* bcol = *(bj+nextb); */
589         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
590           caj[k] += (*aa)*(*(ba+nextb));
591           nextb++;
592         }
593       }
594       flops += 2*bnzi;
595       aa++;
596     }
597   }
598 
599   /* Assemble the final matrix and clean up */
600   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
601   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
602   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605