xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 4bf112e79d3f87ddad6dbf7b300a6f889c1e7d34)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult"
13 /*@
14    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the left matrix
20 .  B - the right matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of AIJ matrices and classes
31    which inherit from AIJ.  C will be of type MATAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
36 @*/
37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50   PetscValidType(B,2);
51   MatPreallocated(B);
52   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56 
57   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58 
59   /* For now, we do not dispatch based on the type of A and B */
60   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61   fA = A->ops->matmult;
62   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63   fB = B->ops->matmult;
64   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66 
67   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
68   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
69   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (scall == MAT_INITIAL_MATRIX){
82     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83   } else if (scall == MAT_REUSE_MATRIX){
84     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
94   PetscErrorCode ierr;
95 
96   PetscFunctionBegin;
97   if (scall == MAT_INITIAL_MATRIX){
98     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
99   }
100   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatMatMultSymbolic"
106 /*@
107    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
108    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
109 
110    Collective on Mat
111 
112    Input Parameters:
113 +  A - the left matrix
114 .  B - the right matrix
115 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
116 
117    Output Parameters:
118 .  C - the matrix containing the ij structure of product matrix
119 
120    Notes:
121    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
122 
123    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
124 
125    Level: intermediate
126 
127 .seealso: MatMatMult(),MatMatMultNumeric()
128 @*/
129 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
130   PetscErrorCode ierr;
131   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
132   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136   PetscValidType(A,1);
137   MatPreallocated(A);
138   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
139   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
140 
141   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142   PetscValidType(B,2);
143   MatPreallocated(B);
144   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
145   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
146   PetscValidPointer(C,3);
147 
148   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
149   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
150 
151   /* For now, we do not dispatch based on the type of A and P */
152   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
153   Asymbolic = A->ops->matmultsymbolic;
154   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
155   Bsymbolic = B->ops->matmultsymbolic;
156   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
157   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
158 
159   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
160   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
161   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162 
163   PetscFunctionReturn(0);
164 }
165 
166 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
169 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
170 {
171   PetscErrorCode ierr;
172   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
173 
174   PetscFunctionBegin;
175   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
176   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
177   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
178   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
179   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
180   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
181   ierr = PetscFree(mult);CHKERRQ(ierr);
182 
183   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
184 
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
190 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
191 {
192   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
193   PetscErrorCode    ierr;
194   int               *idx,i,start,end,ncols,nzA,nzB,*cmap;
195   Mat_MatMatMultMPI *mult;
196 
197   PetscFunctionBegin;
198   if (a->cstart != b->rstart || a->cend != b->rend){
199     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
200   }
201   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
202 
203   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
204   start = a->cstart;
205   cmap  = a->garray;
206   nzA   = a->A->n;
207   nzB   = a->B->n;
208   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
209   ncols = 0;
210   for (i=0; i<nzB; i++) {
211     if (cmap[i] < start) idx[ncols++] = cmap[i];
212     else break;
213   }
214   mult->brstart = i;
215   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
216   for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i];
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
218   ierr = PetscFree(idx);CHKERRQ(ierr);
219   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
220   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr);
221 
222   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
223   start = a->rstart; end = a->rend;
224   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
225   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
226 
227   /* compute C_seq = A_seq * B_seq */
228   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
229 
230   /* create mpi matrix C by concatinating C_seq */
231   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
232   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
233 
234   /* attach the supporting struct to C for reuse of symbolic C */
235   (*C)->spptr         = (void*)mult;
236   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
237 
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
243 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
244 {
245   PetscErrorCode ierr;
246   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
247   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
248   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
249   int            am=A->M,bn=B->N,bm=B->M;
250   int            i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
251   MatScalar      *ca;
252   PetscBT        lnkbt;
253 
254   PetscFunctionBegin;
255   /* Set up */
256   /* Allocate ci array, arrays for fill computation and */
257   /* free space for accumulating nonzero column info */
258   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
259   ci[0] = 0;
260 
261   /* create and initialize a linked list */
262   nlnk = bn+1;
263   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
264 
265   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
266   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
267   current_space = free_space;
268 
269   /* Determine symbolic info for each row of the product: */
270   for (i=0;i<am;i++) {
271     anzi = ai[i+1] - ai[i];
272     cnzi = 0;
273     j    = anzi;
274     aj   = a->j + ai[i];
275     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
276       j--;
277       brow = *(aj + j);
278       bnzj = bi[brow+1] - bi[brow];
279       bjj  = bj + bi[brow];
280       /* add non-zero cols of B into the sorted linked list lnk */
281       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
282       cnzi += nlnk;
283     }
284 
285     /* If free space is not available, make more free space */
286     /* Double the amount of total space in the list */
287     if (current_space->local_remaining<cnzi) {
288       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
289       nspacedouble++;
290     }
291 
292     /* Copy data into free space, then initialize lnk */
293     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
294     current_space->array           += cnzi;
295     current_space->local_used      += cnzi;
296     current_space->local_remaining -= cnzi;
297 
298     ci[i+1] = ci[i] + cnzi;
299   }
300 
301   /* Column indices are in the list of free space */
302   /* Allocate space for cj, initialize cj, and */
303   /* destroy list of free space and other temporary array(s) */
304   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
305   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
306   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
307 
308   /* Allocate space for ca */
309   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
310   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
311 
312   /* put together the new symbolic matrix */
313   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
314 
315   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
316   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
317   c = (Mat_SeqAIJ *)((*C)->data);
318   c->freedata = PETSC_TRUE;
319   c->nonew    = 0;
320 
321   if (nspacedouble){
322     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatMatMultNumeric"
329 /*@
330    MatMatMultNumeric - Performs the numeric matrix-matrix product.
331    Call this routine after first calling MatMatMultSymbolic().
332 
333    Collective on Mat
334 
335    Input Parameters:
336 +  A - the left matrix
337 -  B - the right matrix
338 
339    Output Parameters:
340 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
341 
342    Notes:
343    C must have been created with MatMatMultSymbolic.
344 
345    This routine is currently only implemented for SeqAIJ type matrices.
346 
347    Level: intermediate
348 
349 .seealso: MatMatMult(),MatMatMultSymbolic()
350 @*/
351 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
352   PetscErrorCode ierr;
353   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
354   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
355 
356   PetscFunctionBegin;
357 
358   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
359   PetscValidType(A,1);
360   MatPreallocated(A);
361   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
362   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
363 
364   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
365   PetscValidType(B,2);
366   MatPreallocated(B);
367   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
368   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
369 
370   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
371   PetscValidType(C,3);
372   MatPreallocated(C);
373   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
374   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
375 
376   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
377   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
378   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
379 
380   /* For now, we do not dispatch based on the type of A and B */
381   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
382   Anumeric = A->ops->matmultnumeric;
383   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
384   Bnumeric = B->ops->matmultnumeric;
385   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
386   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
387 
388   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
389   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
390   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
391 
392   PetscFunctionReturn(0);
393 }
394 
395 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
398 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
399 {
400   PetscErrorCode ierr;
401   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
402 
403   PetscFunctionBegin;
404   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
405   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
406   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
407 
408   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
409   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
410 
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
416 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
417 {
418   PetscErrorCode ierr;
419   int        flops=0;
420   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
421   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
422   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
423   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
424   int        am=A->M,cn=C->N;
425   int        i,j,k,anzi,bnzi,cnzi,brow;
426   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
427 
428   PetscFunctionBegin;
429 
430   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
431   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
432   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
433   /* Traverse A row-wise. */
434   /* Build the ith row in C by summing over nonzero columns in A, */
435   /* the rows of B corresponding to nonzeros of A. */
436   for (i=0;i<am;i++) {
437     anzi = ai[i+1] - ai[i];
438     for (j=0;j<anzi;j++) {
439       brow = *aj++;
440       bnzi = bi[brow+1] - bi[brow];
441       bjj  = bj + bi[brow];
442       baj  = ba + bi[brow];
443       for (k=0;k<bnzi;k++) {
444         temp[bjj[k]] += (*aa)*baj[k];
445       }
446       flops += 2*bnzi;
447       aa++;
448     }
449     /* Store row back into C, and re-zero temp */
450     cnzi = ci[i+1] - ci[i];
451     for (j=0;j<cnzi;j++) {
452       ca[j] = temp[cj[j]];
453       temp[cj[j]] = 0.0;
454     }
455     ca += cnzi;
456     cj += cnzi;
457   }
458   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460 
461   /* Free temp */
462   ierr = PetscFree(temp);CHKERRQ(ierr);
463   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatMatMultTranspose"
469 /*@
470    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
471 
472    Collective on Mat
473 
474    Input Parameters:
475 +  A - the left matrix
476 .  B - the right matrix
477 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
478 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
479 
480    Output Parameters:
481 .  C - the product matrix
482 
483    Notes:
484    C will be created and must be destroyed by the user with MatDestroy().
485 
486    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
487    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
488 
489    Level: intermediate
490 
491 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
492 @*/
493 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
494 {
495   PetscErrorCode ierr;
496   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
497   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
498 
499   PetscFunctionBegin;
500   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
501   PetscValidType(A,1);
502   MatPreallocated(A);
503   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
504   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
505   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
506   PetscValidType(B,2);
507   MatPreallocated(B);
508   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
509   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
510   PetscValidPointer(C,3);
511   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
512 
513   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
514 
515   fA = A->ops->matmulttranspose;
516   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
517   fB = B->ops->matmulttranspose;
518   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
519   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
520 
521   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
522   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
523   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
524 
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
530 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
531   PetscErrorCode ierr;
532 
533   PetscFunctionBegin;
534   if (scall == MAT_INITIAL_MATRIX){
535     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
536   }
537   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
543 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
544 {
545   PetscErrorCode ierr;
546   Mat            At;
547   int            *ati,*atj;
548 
549   PetscFunctionBegin;
550   /* create symbolic At */
551   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
552   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
553 
554   /* get symbolic C=At*B */
555   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
556 
557   /* clean up */
558   ierr = MatDestroy(At);CHKERRQ(ierr);
559   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
560 
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
566 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
567 {
568   PetscErrorCode ierr;
569   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
570   int            am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
571   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
572   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
573 
574   PetscFunctionBegin;
575   /* clear old values in C */
576   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
577 
578   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
579   for (i=0;i<am;i++) {
580     bj   = b->j + bi[i];
581     ba   = b->a + bi[i];
582     bnzi = bi[i+1] - bi[i];
583     anzi = ai[i+1] - ai[i];
584     for (j=0; j<anzi; j++) {
585       nextb = 0;
586       crow  = *aj++;
587       cjj   = cj + ci[crow];
588       caj   = ca + ci[crow];
589       /* perform sparse axpy operation.  Note cjj includes bj. */
590       for (k=0; nextb<bnzi; k++) {
591         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
592           caj[k] += (*aa)*(*(ba+nextb));
593           nextb++;
594         }
595       }
596       flops += 2*bnzi;
597       aa++;
598     }
599   }
600 
601   /* Assemble the final matrix and clean up */
602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
604   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607