xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision e005ede52eafe2fed2813abb7a7eb3df04d5f886)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult"
13 /*@
14    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the left matrix
20 .  B - the right matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of AIJ matrices and classes
31    which inherit from AIJ.  C will be of type MATAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
36 @*/
37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50   PetscValidType(B,2);
51   MatPreallocated(B);
52   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56 
57   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58 
59   /* For now, we do not dispatch based on the type of A and B */
60   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61   fA = A->ops->matmult;
62   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63   fB = B->ops->matmult;
64   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66 
67   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
68   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
69   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (scall == MAT_INITIAL_MATRIX){
82     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83   } else if (scall == MAT_REUSE_MATRIX){
84     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
94 {
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98   if (scall == MAT_INITIAL_MATRIX){
99     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
100   }
101   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatMatMultSymbolic"
107 /*@
108    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
109    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
110 
111    Collective on Mat
112 
113    Input Parameters:
114 +  A - the left matrix
115 .  B - the right matrix
116 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
117 
118    Output Parameters:
119 .  C - the matrix containing the ij structure of product matrix
120 
121    Notes:
122    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
123 
124    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
125 
126    Level: intermediate
127 
128 .seealso: MatMatMult(),MatMatMultNumeric()
129 @*/
130 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C)
131 {
132   PetscErrorCode ierr;
133   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
134   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
135 
136   PetscFunctionBegin;
137   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
138   PetscValidType(A,1);
139   MatPreallocated(A);
140   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
141   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
142 
143   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
144   PetscValidType(B,2);
145   MatPreallocated(B);
146   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
147   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
148   PetscValidPointer(C,3);
149 
150   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
151   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
152 
153   /* For now, we do not dispatch based on the type of A and P */
154   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
155   Asymbolic = A->ops->matmultsymbolic;
156   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
157   Bsymbolic = B->ops->matmultsymbolic;
158   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
159   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
160 
161   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
163   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
164 
165   PetscFunctionReturn(0);
166 }
167 
168 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
169 #undef __FUNCT__
170 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
171 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
172 {
173   PetscErrorCode       ierr;
174   Mat_MatMatMultMPI    *mult;
175   PetscObjectContainer container;
176 
177   PetscFunctionBegin;
178   ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
179   if (container) {
180     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
181     ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
182     ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
183     ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
184     ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
185     ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
186     ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
187 
188     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
189     ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr);
190   }
191   ierr = PetscFree(mult);CHKERRQ(ierr);
192   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
193 
194   PetscFunctionReturn(0);
195 }
196 
197 #undef __FUNCT__
198 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
199 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
200 {
201   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
202   PetscErrorCode       ierr;
203   PetscInt             start,end;
204   Mat_MatMatMultMPI    *mult;
205   PetscObjectContainer container;
206 
207   PetscFunctionBegin;
208   if (a->cstart != b->rstart || a->cend != b->rend){
209     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
210   }
211   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
212 
213   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
214   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
215 
216   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
217   start = a->rstart; end = a->rend;
218   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
219   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
220 
221   /* compute C_seq = A_seq * B_seq */
222   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
223 
224   /* create mpi matrix C by concatinating C_seq */
225   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
226   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
227 
228   /* attach the supporting struct to C for reuse of symbolic C */
229   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
230   ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr);
231   ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr);
232 
233   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
240 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
241 {
242   PetscErrorCode ierr;
243   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
244   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
245   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
246   PetscInt       am=A->M,bn=B->N,bm=B->M;
247   PetscInt       i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
248   MatScalar      *ca;
249   PetscBT        lnkbt;
250 
251   PetscFunctionBegin;
252   /* Set up */
253   /* Allocate ci array, arrays for fill computation and */
254   /* free space for accumulating nonzero column info */
255   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
256   ci[0] = 0;
257 
258   /* create and initialize a linked list */
259   nlnk = bn+1;
260   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
261 
262   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
263   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
264   current_space = free_space;
265 
266   /* Determine symbolic info for each row of the product: */
267   for (i=0;i<am;i++) {
268     anzi = ai[i+1] - ai[i];
269     cnzi = 0;
270     j    = anzi;
271     aj   = a->j + ai[i];
272     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
273       j--;
274       brow = *(aj + j);
275       bnzj = bi[brow+1] - bi[brow];
276       bjj  = bj + bi[brow];
277       /* add non-zero cols of B into the sorted linked list lnk */
278       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
279       cnzi += nlnk;
280     }
281 
282     /* If free space is not available, make more free space */
283     /* Double the amount of total space in the list */
284     if (current_space->local_remaining<cnzi) {
285       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
286       nspacedouble++;
287     }
288 
289     /* Copy data into free space, then initialize lnk */
290     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
291     current_space->array           += cnzi;
292     current_space->local_used      += cnzi;
293     current_space->local_remaining -= cnzi;
294 
295     ci[i+1] = ci[i] + cnzi;
296   }
297 
298   /* Column indices are in the list of free space */
299   /* Allocate space for cj, initialize cj, and */
300   /* destroy list of free space and other temporary array(s) */
301   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
302   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
303   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
304 
305   /* Allocate space for ca */
306   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
307   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
308 
309   /* put together the new symbolic matrix */
310   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
311 
312   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
313   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
314   c = (Mat_SeqAIJ *)((*C)->data);
315   c->freedata = PETSC_TRUE;
316   c->nonew    = 0;
317 
318   if (nspacedouble){
319     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMatMultNumeric"
326 /*@
327    MatMatMultNumeric - Performs the numeric matrix-matrix product.
328    Call this routine after first calling MatMatMultSymbolic().
329 
330    Collective on Mat
331 
332    Input Parameters:
333 +  A - the left matrix
334 -  B - the right matrix
335 
336    Output Parameters:
337 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
338 
339    Notes:
340    C must have been created with MatMatMultSymbolic.
341 
342    This routine is currently only implemented for SeqAIJ type matrices.
343 
344    Level: intermediate
345 
346 .seealso: MatMatMult(),MatMatMultSymbolic()
347 @*/
348 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C)
349 {
350   PetscErrorCode ierr;
351   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
352   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
353 
354   PetscFunctionBegin;
355 
356   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
357   PetscValidType(A,1);
358   MatPreallocated(A);
359   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
360   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
361 
362   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
363   PetscValidType(B,2);
364   MatPreallocated(B);
365   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
366   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
367 
368   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
369   PetscValidType(C,3);
370   MatPreallocated(C);
371   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
372   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
373 
374   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
375   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
376   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
377 
378   /* For now, we do not dispatch based on the type of A and B */
379   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
380   Anumeric = A->ops->matmultnumeric;
381   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
382   Bnumeric = B->ops->matmultnumeric;
383   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
384   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
385 
386   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
387   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
388   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
389 
390   PetscFunctionReturn(0);
391 }
392 
393 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
394 #undef __FUNCT__
395 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
396 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
397 {
398   PetscErrorCode       ierr;
399   Mat                  *seq;
400   Mat_MatMatMultMPI    *mult;
401   PetscObjectContainer container;
402 
403   PetscFunctionBegin;
404   ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
405   if (container) {
406     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
407   }
408 
409   seq = &mult->B_seq;
410   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
411   mult->B_seq = *seq;
412 
413   seq = &mult->A_loc;
414   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
415   mult->A_loc = *seq;
416 
417   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
418 
419   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
420   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
421 
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
427 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
428 {
429   PetscErrorCode ierr;
430   PetscInt       flops=0;
431   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
432   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
433   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
434   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
435   PetscInt       am=A->M,cn=C->N;
436   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
437   MatScalar      *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
438 
439   PetscFunctionBegin;
440 
441   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
442   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
443   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
444   /* Traverse A row-wise. */
445   /* Build the ith row in C by summing over nonzero columns in A, */
446   /* the rows of B corresponding to nonzeros of A. */
447   for (i=0;i<am;i++) {
448     anzi = ai[i+1] - ai[i];
449     for (j=0;j<anzi;j++) {
450       brow = *aj++;
451       bnzi = bi[brow+1] - bi[brow];
452       bjj  = bj + bi[brow];
453       baj  = ba + bi[brow];
454       for (k=0;k<bnzi;k++) {
455         temp[bjj[k]] += (*aa)*baj[k];
456       }
457       flops += 2*bnzi;
458       aa++;
459     }
460     /* Store row back into C, and re-zero temp */
461     cnzi = ci[i+1] - ci[i];
462     for (j=0;j<cnzi;j++) {
463       ca[j] = temp[cj[j]];
464       temp[cj[j]] = 0.0;
465     }
466     ca += cnzi;
467     cj += cnzi;
468   }
469   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471 
472   /* Free temp */
473   ierr = PetscFree(temp);CHKERRQ(ierr);
474   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMatMultTranspose"
480 /*@
481    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
482 
483    Collective on Mat
484 
485    Input Parameters:
486 +  A - the left matrix
487 .  B - the right matrix
488 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
489 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
490 
491    Output Parameters:
492 .  C - the product matrix
493 
494    Notes:
495    C will be created and must be destroyed by the user with MatDestroy().
496 
497    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
498    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
499 
500    Level: intermediate
501 
502 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
503 @*/
504 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
505 {
506   PetscErrorCode ierr;
507   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
508   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
509 
510   PetscFunctionBegin;
511   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
512   PetscValidType(A,1);
513   MatPreallocated(A);
514   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
515   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
516   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
517   PetscValidType(B,2);
518   MatPreallocated(B);
519   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
520   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
521   PetscValidPointer(C,3);
522   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
523 
524   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
525 
526   fA = A->ops->matmulttranspose;
527   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
528   fB = B->ops->matmulttranspose;
529   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
530   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
531 
532   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
533   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
534   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
535 
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
541 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   if (scall == MAT_INITIAL_MATRIX){
546     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
547   }
548   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
554 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
555 {
556   PetscErrorCode ierr;
557   Mat            At;
558   PetscInt       *ati,*atj;
559 
560   PetscFunctionBegin;
561   /* create symbolic At */
562   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
563   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
564 
565   /* get symbolic C=At*B */
566   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
567 
568   /* clean up */
569   ierr = MatDestroy(At);CHKERRQ(ierr);
570   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
571 
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
577 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
578 {
579   PetscErrorCode ierr;
580   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
581   PetscInt       am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
582   PetscInt       cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
583   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
584 
585   PetscFunctionBegin;
586   /* clear old values in C */
587   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
588 
589   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
590   for (i=0;i<am;i++) {
591     bj   = b->j + bi[i];
592     ba   = b->a + bi[i];
593     bnzi = bi[i+1] - bi[i];
594     anzi = ai[i+1] - ai[i];
595     for (j=0; j<anzi; j++) {
596       nextb = 0;
597       crow  = *aj++;
598       cjj   = cj + ci[crow];
599       caj   = ca + ci[crow];
600       /* perform sparse axpy operation.  Note cjj includes bj. */
601       for (k=0; nextb<bnzi; k++) {
602         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
603           caj[k] += (*aa)*(*(ba+nextb));
604           nextb++;
605         }
606       }
607       flops += 2*bnzi;
608       aa++;
609     }
610   }
611 
612   /* Assemble the final matrix and clean up */
613   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
614   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
615   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618