xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 901853e05a144ee1b7dab90d28db377fef86bcf8)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult"
13 /*@
14    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the left matrix
20 .  B - the right matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of AIJ matrices and classes
31    which inherit from AIJ.  C will be of type MATAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
36 @*/
37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50   PetscValidType(B,2);
51   MatPreallocated(B);
52   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56 
57   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58 
59   /* For now, we do not dispatch based on the type of A and B */
60   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61   fA = A->ops->matmult;
62   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63   fB = B->ops->matmult;
64   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66 
67   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
68   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
69   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (scall == MAT_INITIAL_MATRIX){
82     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83   } else if (scall == MAT_REUSE_MATRIX){
84     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
94   PetscErrorCode ierr;
95 
96   PetscFunctionBegin;
97   if (scall == MAT_INITIAL_MATRIX){
98     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
99   }
100   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatMatMultSymbolic"
106 /*@
107    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
108    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
109 
110    Collective on Mat
111 
112    Input Parameters:
113 +  A - the left matrix
114 .  B - the right matrix
115 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
116 
117    Output Parameters:
118 .  C - the matrix containing the ij structure of product matrix
119 
120    Notes:
121    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
122 
123    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
124 
125    Level: intermediate
126 
127 .seealso: MatMatMult(),MatMatMultNumeric()
128 @*/
129 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
130   PetscErrorCode ierr;
131   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
132   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136   PetscValidType(A,1);
137   MatPreallocated(A);
138   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
139   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
140 
141   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142   PetscValidType(B,2);
143   MatPreallocated(B);
144   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
145   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
146   PetscValidPointer(C,3);
147 
148   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
149   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
150 
151   /* For now, we do not dispatch based on the type of A and P */
152   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
153   Asymbolic = A->ops->matmultsymbolic;
154   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
155   Bsymbolic = B->ops->matmultsymbolic;
156   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
157   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
158 
159   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
160   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
161   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162 
163   PetscFunctionReturn(0);
164 }
165 
166 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
169 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
170 {
171   PetscErrorCode ierr;
172   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
173 
174   PetscFunctionBegin;
175   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
176   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
177   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
178   ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
179   ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
180   /*
181   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
182   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
183   */
184   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
185   ierr = PetscFree(mult);CHKERRQ(ierr);
186 
187   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
188 
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
194 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
195 {
196   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
197   PetscErrorCode    ierr;
198   int               *idx,i,start,end,ncols,nzA,nzB,*cmap;
199   Mat_MatMatMultMPI *mult;
200 
201   PetscFunctionBegin;
202   if (a->cstart != b->rstart || a->cend != b->rend){
203     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
204   }
205   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
206 
207   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
208   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
209 
210   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
211   start = a->rstart; end = a->rend;
212   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
213   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
214 
215   /* compute C_seq = A_seq * B_seq */
216   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
217 
218   /* create mpi matrix C by concatinating C_seq */
219   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
220   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
221 
222   /* attach the supporting struct to C for reuse of symbolic C */
223   (*C)->spptr         = (void*)mult;
224   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
225 
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
231 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
232 {
233   PetscErrorCode ierr;
234   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
235   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
236   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
237   int            am=A->M,bn=B->N,bm=B->M;
238   int            i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
239   MatScalar      *ca;
240   PetscBT        lnkbt;
241 
242   PetscFunctionBegin;
243   /* Set up */
244   /* Allocate ci array, arrays for fill computation and */
245   /* free space for accumulating nonzero column info */
246   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
247   ci[0] = 0;
248 
249   /* create and initialize a linked list */
250   nlnk = bn+1;
251   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
252 
253   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
254   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
255   current_space = free_space;
256 
257   /* Determine symbolic info for each row of the product: */
258   for (i=0;i<am;i++) {
259     anzi = ai[i+1] - ai[i];
260     cnzi = 0;
261     j    = anzi;
262     aj   = a->j + ai[i];
263     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
264       j--;
265       brow = *(aj + j);
266       bnzj = bi[brow+1] - bi[brow];
267       bjj  = bj + bi[brow];
268       /* add non-zero cols of B into the sorted linked list lnk */
269       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
270       cnzi += nlnk;
271     }
272 
273     /* If free space is not available, make more free space */
274     /* Double the amount of total space in the list */
275     if (current_space->local_remaining<cnzi) {
276       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
277       nspacedouble++;
278     }
279 
280     /* Copy data into free space, then initialize lnk */
281     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
282     current_space->array           += cnzi;
283     current_space->local_used      += cnzi;
284     current_space->local_remaining -= cnzi;
285 
286     ci[i+1] = ci[i] + cnzi;
287   }
288 
289   /* Column indices are in the list of free space */
290   /* Allocate space for cj, initialize cj, and */
291   /* destroy list of free space and other temporary array(s) */
292   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
293   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
294   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
295 
296   /* Allocate space for ca */
297   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
298   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
299 
300   /* put together the new symbolic matrix */
301   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
302 
303   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
304   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
305   c = (Mat_SeqAIJ *)((*C)->data);
306   c->freedata = PETSC_TRUE;
307   c->nonew    = 0;
308 
309   if (nspacedouble){
310     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatMatMultNumeric"
317 /*@
318    MatMatMultNumeric - Performs the numeric matrix-matrix product.
319    Call this routine after first calling MatMatMultSymbolic().
320 
321    Collective on Mat
322 
323    Input Parameters:
324 +  A - the left matrix
325 -  B - the right matrix
326 
327    Output Parameters:
328 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
329 
330    Notes:
331    C must have been created with MatMatMultSymbolic.
332 
333    This routine is currently only implemented for SeqAIJ type matrices.
334 
335    Level: intermediate
336 
337 .seealso: MatMatMult(),MatMatMultSymbolic()
338 @*/
339 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
340   PetscErrorCode ierr;
341   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
342   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
343 
344   PetscFunctionBegin;
345 
346   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
347   PetscValidType(A,1);
348   MatPreallocated(A);
349   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
350   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
351 
352   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
353   PetscValidType(B,2);
354   MatPreallocated(B);
355   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
356   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
357 
358   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
359   PetscValidType(C,3);
360   MatPreallocated(C);
361   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
362   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
363 
364   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
365   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
366   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
367 
368   /* For now, we do not dispatch based on the type of A and B */
369   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
370   Anumeric = A->ops->matmultnumeric;
371   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
372   Bnumeric = B->ops->matmultnumeric;
373   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
374   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
375 
376   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
377   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
378   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
379 
380   PetscFunctionReturn(0);
381 }
382 
383 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
386 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
387 {
388   PetscErrorCode ierr;
389   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
390   Mat               *seq;
391 
392   PetscFunctionBegin;
393   seq = &mult->B_seq;
394   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
395   mult->B_seq = *seq;
396 
397   seq = &mult->A_loc;
398   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
399   mult->A_loc = *seq;
400 
401   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
402 
403   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
404   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
405 
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
411 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
412 {
413   PetscErrorCode ierr;
414   int        flops=0;
415   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
416   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
417   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
418   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
419   int        am=A->M,cn=C->N;
420   int        i,j,k,anzi,bnzi,cnzi,brow;
421   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
422 
423   PetscFunctionBegin;
424 
425   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
426   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
427   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
428   /* Traverse A row-wise. */
429   /* Build the ith row in C by summing over nonzero columns in A, */
430   /* the rows of B corresponding to nonzeros of A. */
431   for (i=0;i<am;i++) {
432     anzi = ai[i+1] - ai[i];
433     for (j=0;j<anzi;j++) {
434       brow = *aj++;
435       bnzi = bi[brow+1] - bi[brow];
436       bjj  = bj + bi[brow];
437       baj  = ba + bi[brow];
438       for (k=0;k<bnzi;k++) {
439         temp[bjj[k]] += (*aa)*baj[k];
440       }
441       flops += 2*bnzi;
442       aa++;
443     }
444     /* Store row back into C, and re-zero temp */
445     cnzi = ci[i+1] - ci[i];
446     for (j=0;j<cnzi;j++) {
447       ca[j] = temp[cj[j]];
448       temp[cj[j]] = 0.0;
449     }
450     ca += cnzi;
451     cj += cnzi;
452   }
453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455 
456   /* Free temp */
457   ierr = PetscFree(temp);CHKERRQ(ierr);
458   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatMatMultTranspose"
464 /*@
465    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
466 
467    Collective on Mat
468 
469    Input Parameters:
470 +  A - the left matrix
471 .  B - the right matrix
472 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
473 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
474 
475    Output Parameters:
476 .  C - the product matrix
477 
478    Notes:
479    C will be created and must be destroyed by the user with MatDestroy().
480 
481    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
482    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
483 
484    Level: intermediate
485 
486 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
487 @*/
488 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
489 {
490   PetscErrorCode ierr;
491   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
492   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
493 
494   PetscFunctionBegin;
495   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
496   PetscValidType(A,1);
497   MatPreallocated(A);
498   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
499   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
500   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
501   PetscValidType(B,2);
502   MatPreallocated(B);
503   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
504   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
505   PetscValidPointer(C,3);
506   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
507 
508   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
509 
510   fA = A->ops->matmulttranspose;
511   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
512   fB = B->ops->matmulttranspose;
513   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
514   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
515 
516   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
517   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
518   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
519 
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
525 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
526   PetscErrorCode ierr;
527 
528   PetscFunctionBegin;
529   if (scall == MAT_INITIAL_MATRIX){
530     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
531   }
532   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
538 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
539 {
540   PetscErrorCode ierr;
541   Mat            At;
542   int            *ati,*atj;
543 
544   PetscFunctionBegin;
545   /* create symbolic At */
546   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
547   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
548 
549   /* get symbolic C=At*B */
550   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
551 
552   /* clean up */
553   ierr = MatDestroy(At);CHKERRQ(ierr);
554   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
555 
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
561 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
562 {
563   PetscErrorCode ierr;
564   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
565   int            am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
566   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
567   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
568 
569   PetscFunctionBegin;
570   /* clear old values in C */
571   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
572 
573   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
574   for (i=0;i<am;i++) {
575     bj   = b->j + bi[i];
576     ba   = b->a + bi[i];
577     bnzi = bi[i+1] - bi[i];
578     anzi = ai[i+1] - ai[i];
579     for (j=0; j<anzi; j++) {
580       nextb = 0;
581       crow  = *aj++;
582       cjj   = cj + ci[crow];
583       caj   = ca + ci[crow];
584       /* perform sparse axpy operation.  Note cjj includes bj. */
585       for (k=0; nextb<bnzi; k++) {
586         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
587           caj[k] += (*aa)*(*(ba+nextb));
588           nextb++;
589         }
590       }
591       flops += 2*bnzi;
592       aa++;
593     }
594   }
595 
596   /* Assemble the final matrix and clean up */
597   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
598   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
599   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602