xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 52e6d16ba989af9362d0fcebb12e73dd7c9666ed)
1 /*
2   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "petscbt.h"
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
13 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
14 {
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   if (scall == MAT_INITIAL_MATRIX){
19     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
20   }
21   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
22   PetscFunctionReturn(0);
23 }
24 
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
28 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
29 {
30   PetscErrorCode ierr;
31   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
32   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
33   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
34   PetscInt       am=A->M,bn=B->N,bm=B->M;
35   PetscInt       i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
36   MatScalar      *ca;
37   PetscBT        lnkbt;
38 
39   PetscFunctionBegin;
40   /* Set up */
41   /* Allocate ci array, arrays for fill computation and */
42   /* free space for accumulating nonzero column info */
43   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
44   ci[0] = 0;
45 
46   /* create and initialize a linked list */
47   nlnk = bn+1;
48   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
49 
50   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
51   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
52   current_space = free_space;
53 
54   /* Determine symbolic info for each row of the product: */
55   for (i=0;i<am;i++) {
56     anzi = ai[i+1] - ai[i];
57     cnzi = 0;
58     j    = anzi;
59     aj   = a->j + ai[i];
60     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
61       j--;
62       brow = *(aj + j);
63       bnzj = bi[brow+1] - bi[brow];
64       bjj  = bj + bi[brow];
65       /* add non-zero cols of B into the sorted linked list lnk */
66       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
67       cnzi += nlnk;
68     }
69 
70     /* If free space is not available, make more free space */
71     /* Double the amount of total space in the list */
72     if (current_space->local_remaining<cnzi) {
73       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
74       nspacedouble++;
75     }
76 
77     /* Copy data into free space, then initialize lnk */
78     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
79     current_space->array           += cnzi;
80     current_space->local_used      += cnzi;
81     current_space->local_remaining -= cnzi;
82 
83     ci[i+1] = ci[i] + cnzi;
84   }
85 
86   /* Column indices are in the list of free space */
87   /* Allocate space for cj, initialize cj, and */
88   /* destroy list of free space and other temporary array(s) */
89   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
90   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
91   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
92 
93   /* Allocate space for ca */
94   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
95   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
96 
97   /* put together the new symbolic matrix */
98   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
99 
100   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
101   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
102   c = (Mat_SeqAIJ *)((*C)->data);
103   c->freedata = PETSC_TRUE;
104   c->nonew    = 0;
105 
106   if (nspacedouble){
107     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%D, nnz(A):%D, nnz(B):%D, fill:%g, nnz(C):%D\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
108   }
109   PetscFunctionReturn(0);
110 }
111 
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
115 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
116 {
117   PetscErrorCode ierr;
118   PetscInt       flops=0;
119   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
120   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
121   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
122   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
123   PetscInt       am=A->M,cm=C->M;
124   PetscInt       i,j,k,anzi,bnzi,cnzi,brow,nextb;
125   MatScalar      *aa=a->a,*ba=b->a,*baj,*ca=c->a;
126 
127   PetscFunctionBegin;
128   /* clean old values in C */
129   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
130   /* Traverse A row-wise. */
131   /* Build the ith row in C by summing over nonzero columns in A, */
132   /* the rows of B corresponding to nonzeros of A. */
133   for (i=0;i<am;i++) {
134     anzi = ai[i+1] - ai[i];
135     for (j=0;j<anzi;j++) {
136       brow = *aj++;
137       bnzi = bi[brow+1] - bi[brow];
138       bjj  = bj + bi[brow];
139       baj  = ba + bi[brow];
140       nextb = 0;
141       for (k=0; nextb<bnzi; k++) {
142         if (cj[k] == bjj[nextb]){ /* ccol == bcol */
143           ca[k] += (*aa)*baj[nextb++];
144         }
145       }
146       flops += 2*bnzi;
147       aa++;
148     }
149     cnzi = ci[i+1] - ci[i];
150     ca += cnzi;
151     cj += cnzi;
152   }
153   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155 
156   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 
161 #undef __FUNCT__
162 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
163 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   if (scall == MAT_INITIAL_MATRIX){
168     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
169   }
170   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
176 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
177 {
178   PetscErrorCode ierr;
179   Mat            At;
180   PetscInt       *ati,*atj;
181 
182   PetscFunctionBegin;
183   /* create symbolic At */
184   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
185   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
186 
187   /* get symbolic C=At*B */
188   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
189 
190   /* clean up */
191   ierr = MatDestroy(At);CHKERRQ(ierr);
192   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
193 
194   PetscFunctionReturn(0);
195 }
196 
197 #undef __FUNCT__
198 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
199 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
200 {
201   PetscErrorCode ierr;
202   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
203   PetscInt       am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
204   PetscInt       cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
205   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
206 
207   PetscFunctionBegin;
208   /* clear old values in C */
209   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
210 
211   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
212   for (i=0;i<am;i++) {
213     bj   = b->j + bi[i];
214     ba   = b->a + bi[i];
215     bnzi = bi[i+1] - bi[i];
216     anzi = ai[i+1] - ai[i];
217     for (j=0; j<anzi; j++) {
218       nextb = 0;
219       crow  = *aj++;
220       cjj   = cj + ci[crow];
221       caj   = ca + ci[crow];
222       /* perform sparse axpy operation.  Note cjj includes bj. */
223       for (k=0; nextb<bnzi; k++) {
224         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
225           caj[k] += (*aa)*(*(ba+nextb));
226           nextb++;
227         }
228       }
229       flops += 2*bnzi;
230       aa++;
231     }
232   }
233 
234   /* Assemble the final matrix and clean up */
235   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
237   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240