xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 7c334f0243eb3f00fc22f2f6efb0b24b1906cf86)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult"
13 /*@
14    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the left matrix
20 .  B - the right matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of AIJ matrices and classes
31    which inherit from AIJ.  C will be of type MATAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
36 @*/
37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50   PetscValidType(B,2);
51   MatPreallocated(B);
52   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
56 
57   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58 
59   /* For now, we do not dispatch based on the type of A and B */
60   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61   fA = A->ops->matmult;
62   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63   fB = B->ops->matmult;
64   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66 
67   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
68   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
69   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (scall == MAT_INITIAL_MATRIX){
82     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83   } else if (scall == MAT_REUSE_MATRIX){
84     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
94 {
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98   if (scall == MAT_INITIAL_MATRIX){
99     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
100   }
101   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatMatMultSymbolic"
107 /*@
108    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
109    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
110 
111    Collective on Mat
112 
113    Input Parameters:
114 +  A - the left matrix
115 .  B - the right matrix
116 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
117 
118    Output Parameters:
119 .  C - the matrix containing the ij structure of product matrix
120 
121    Notes:
122    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
123 
124    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
125 
126    Level: intermediate
127 
128 .seealso: MatMatMult(),MatMatMultNumeric()
129 @*/
130 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C)
131 {
132   PetscErrorCode ierr;
133   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
134   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
135 
136   PetscFunctionBegin;
137   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
138   PetscValidType(A,1);
139   MatPreallocated(A);
140   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
141   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
142 
143   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
144   PetscValidType(B,2);
145   MatPreallocated(B);
146   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
147   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
148   PetscValidPointer(C,3);
149 
150   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
151   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
152 
153   /* For now, we do not dispatch based on the type of A and P */
154   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
155   Asymbolic = A->ops->matmultsymbolic;
156   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
157   Bsymbolic = B->ops->matmultsymbolic;
158   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
159   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
160 
161   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
163   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
164 
165   PetscFunctionReturn(0);
166 }
167 
168 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
169 #undef __FUNCT__
170 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
171 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
172 {
173   PetscErrorCode       ierr;
174   Mat_MatMatMultMPI    *mult;
175   PetscObjectContainer container;
176 
177   PetscFunctionBegin;
178   ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
179   if (container) {
180     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
181     ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
182     ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
183     ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
184     ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
185     ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
186     ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
187 
188     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
189     ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr);
190   }
191   ierr = PetscFree(mult);CHKERRQ(ierr);
192   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
193 
194   PetscFunctionReturn(0);
195 }
196 
197 #undef __FUNCT__
198 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
199 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
200 {
201   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
202   PetscErrorCode       ierr;
203   PetscInt             start,end;
204   Mat_MatMatMultMPI    *mult;
205   PetscObjectContainer container;
206 
207   PetscFunctionBegin;
208   if (a->cstart != b->rstart || a->cend != b->rend){
209     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
210   }
211   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
212 
213   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
214   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
215 
216   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
217   start = a->rstart; end = a->rend;
218   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
219   ierr = MatGetLocalMatCondensed(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
220 
221   /* compute C_seq = A_seq * B_seq */
222   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
223 
224   /* create mpi matrix C by concatinating C_seq */
225   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
226   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
227 
228   /* attach the supporting struct to C for reuse of symbolic C */
229   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
230   ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr);
231   ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr);
232 
233   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
240 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
241 {
242   PetscErrorCode ierr;
243   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
244   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
245   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
246   PetscInt       am=A->M,bn=B->N,bm=B->M;
247   PetscInt       i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
248   MatScalar      *ca;
249   PetscBT        lnkbt;
250 
251   PetscFunctionBegin;
252   /* Set up */
253   /* Allocate ci array, arrays for fill computation and */
254   /* free space for accumulating nonzero column info */
255   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
256   ci[0] = 0;
257 
258   /* create and initialize a linked list */
259   nlnk = bn+1;
260   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
261 
262   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
263   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
264   current_space = free_space;
265 
266   /* Determine symbolic info for each row of the product: */
267   for (i=0;i<am;i++) {
268     anzi = ai[i+1] - ai[i];
269     cnzi = 0;
270     j    = anzi;
271     aj   = a->j + ai[i];
272     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
273       j--;
274       brow = *(aj + j);
275       bnzj = bi[brow+1] - bi[brow];
276       bjj  = bj + bi[brow];
277       /* add non-zero cols of B into the sorted linked list lnk */
278       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
279       cnzi += nlnk;
280     }
281 
282     /* If free space is not available, make more free space */
283     /* Double the amount of total space in the list */
284     if (current_space->local_remaining<cnzi) {
285       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
286       nspacedouble++;
287     }
288 
289     /* Copy data into free space, then initialize lnk */
290     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
291     current_space->array           += cnzi;
292     current_space->local_used      += cnzi;
293     current_space->local_remaining -= cnzi;
294 
295     ci[i+1] = ci[i] + cnzi;
296   }
297 
298   /* Column indices are in the list of free space */
299   /* Allocate space for cj, initialize cj, and */
300   /* destroy list of free space and other temporary array(s) */
301   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
302   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
303   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
304 
305   /* Allocate space for ca */
306   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
307   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
308 
309   /* put together the new symbolic matrix */
310   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
311 
312   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
313   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
314   c = (Mat_SeqAIJ *)((*C)->data);
315   c->freedata = PETSC_TRUE;
316   c->nonew    = 0;
317 
318   if (nspacedouble){
319     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%D, nnz(A):%D, nnz(B):%D, fill:%g, nnz(C):%D\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMatMultNumeric"
326 /*@
327    MatMatMultNumeric - Performs the numeric matrix-matrix product.
328    Call this routine after first calling MatMatMultSymbolic().
329 
330    Collective on Mat
331 
332    Input Parameters:
333 +  A - the left matrix
334 -  B - the right matrix
335 
336    Output Parameters:
337 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
338 
339    Notes:
340    C must have been created with MatMatMultSymbolic.
341 
342    This routine is currently only implemented for SeqAIJ type matrices.
343 
344    Level: intermediate
345 
346 .seealso: MatMatMult(),MatMatMultSymbolic()
347 @*/
348 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C)
349 {
350   PetscErrorCode ierr;
351   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
352   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
353 
354   PetscFunctionBegin;
355 
356   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
357   PetscValidType(A,1);
358   MatPreallocated(A);
359   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
360   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
361 
362   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
363   PetscValidType(B,2);
364   MatPreallocated(B);
365   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
366   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
367 
368   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
369   PetscValidType(C,3);
370   MatPreallocated(C);
371   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
372   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
373 
374   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->N,C->N);
375   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
376   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",A->M,C->M);
377 
378   /* For now, we do not dispatch based on the type of A and B */
379   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
380   Anumeric = A->ops->matmultnumeric;
381   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
382   Bnumeric = B->ops->matmultnumeric;
383   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
384   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
385 
386   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
387   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
388   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
389 
390   PetscFunctionReturn(0);
391 }
392 
393 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
394 #undef __FUNCT__
395 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
396 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
397 {
398   PetscErrorCode       ierr;
399   Mat                  *seq;
400   Mat_MatMatMultMPI    *mult;
401   PetscObjectContainer container;
402 
403   PetscFunctionBegin;
404   ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
405   if (container) {
406     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
407   }
408 
409   seq = &mult->B_seq;
410   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
411   mult->B_seq = *seq;
412 
413   seq = &mult->A_loc;
414   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
415   mult->A_loc = *seq;
416 
417   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
418 
419   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
420   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
421 
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
427 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
428 {
429   PetscErrorCode ierr;
430   PetscInt       flops=0;
431   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
432   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
433   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
434   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
435   PetscInt       am=A->M,cm=C->M;
436   PetscInt       i,j,k,anzi,bnzi,cnzi,brow,nextb;
437   MatScalar      *aa=a->a,*ba=b->a,*baj,*ca=c->a;
438 
439   PetscFunctionBegin;
440   /* clean old values in C */
441   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
442   /* Traverse A row-wise. */
443   /* Build the ith row in C by summing over nonzero columns in A, */
444   /* the rows of B corresponding to nonzeros of A. */
445   for (i=0;i<am;i++) {
446     anzi = ai[i+1] - ai[i];
447     for (j=0;j<anzi;j++) {
448       brow = *aj++;
449       bnzi = bi[brow+1] - bi[brow];
450       bjj  = bj + bi[brow];
451       baj  = ba + bi[brow];
452       nextb = 0;
453       for (k=0; nextb<bnzi; k++) {
454         if (cj[k] == bjj[nextb]){ /* ccol == bcol */
455           ca[k] += (*aa)*baj[nextb++];
456         }
457       }
458       flops += 2*bnzi;
459       aa++;
460     }
461     cnzi = ci[i+1] - ci[i];
462     ca += cnzi;
463     cj += cnzi;
464   }
465   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467 
468   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMatMultTranspose"
474 /*@
475    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
476 
477    Collective on Mat
478 
479    Input Parameters:
480 +  A - the left matrix
481 .  B - the right matrix
482 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
483 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
484 
485    Output Parameters:
486 .  C - the product matrix
487 
488    Notes:
489    C will be created and must be destroyed by the user with MatDestroy().
490 
491    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
492    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
493 
494    Level: intermediate
495 
496 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
497 @*/
498 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
499 {
500   PetscErrorCode ierr;
501   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
502   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
503 
504   PetscFunctionBegin;
505   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
506   PetscValidType(A,1);
507   MatPreallocated(A);
508   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
509   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
510   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
511   PetscValidType(B,2);
512   MatPreallocated(B);
513   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
514   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
515   PetscValidPointer(C,3);
516   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->M);
517 
518   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
519 
520   fA = A->ops->matmulttranspose;
521   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
522   fB = B->ops->matmulttranspose;
523   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
524   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
525 
526   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
527   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
528   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
529 
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
535 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
536   PetscErrorCode ierr;
537 
538   PetscFunctionBegin;
539   if (scall == MAT_INITIAL_MATRIX){
540     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
541   }
542   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
548 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
549 {
550   PetscErrorCode ierr;
551   Mat            At;
552   PetscInt       *ati,*atj;
553 
554   PetscFunctionBegin;
555   /* create symbolic At */
556   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
557   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
558 
559   /* get symbolic C=At*B */
560   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
561 
562   /* clean up */
563   ierr = MatDestroy(At);CHKERRQ(ierr);
564   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
571 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
572 {
573   PetscErrorCode ierr;
574   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
575   PetscInt       am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
576   PetscInt       cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
577   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
578 
579   PetscFunctionBegin;
580   /* clear old values in C */
581   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
582 
583   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
584   for (i=0;i<am;i++) {
585     bj   = b->j + bi[i];
586     ba   = b->a + bi[i];
587     bnzi = bi[i+1] - bi[i];
588     anzi = ai[i+1] - ai[i];
589     for (j=0; j<anzi; j++) {
590       nextb = 0;
591       crow  = *aj++;
592       cjj   = cj + ci[crow];
593       caj   = ca + ci[crow];
594       /* perform sparse axpy operation.  Note cjj includes bj. */
595       for (k=0; nextb<bnzi; k++) {
596         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
597           caj[k] += (*aa)*(*(ba+nextb));
598           nextb++;
599         }
600       }
601       flops += 2*bnzi;
602       aa++;
603     }
604   }
605 
606   /* Assemble the final matrix and clean up */
607   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
608   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
609   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612