xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 2d09714cf306bca2324d75942e9b6c7f4c8cb59e)
1d50806bdSBarry Smith /*
22c9ce0e5SKris Buschelman   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
8*2d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9d50806bdSBarry Smith 
101c239cc6SHong Zhang /*
111c239cc6SHong Zhang   Add a index set into a sorted linked list
121c239cc6SHong Zhang   input:
131c239cc6SHong Zhang     nidx      - number of input indices
141c239cc6SHong Zhang     indices   - interger array
151c239cc6SHong Zhang     idx_head  - the header of the list
161c239cc6SHong Zhang     idx_unset - the value indicating the entry in the list is not set yet
171c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
181c239cc6SHong Zhang   output:
191c239cc6SHong Zhang     nlnk      - number of newly added indices
201c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
211c239cc6SHong Zhang  */
221c239cc6SHong Zhang #undef __FUNCT__
231c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd"
241c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
251c239cc6SHong Zhang {
261c239cc6SHong Zhang   int i,idx,lidx,entry,n;
271c239cc6SHong Zhang 
281c239cc6SHong Zhang   PetscFunctionBegin;
291c239cc6SHong Zhang   n    = 0;
301c239cc6SHong Zhang   lidx = idx_head;
311c239cc6SHong Zhang   i    = nidx;
321c239cc6SHong Zhang   while (i){
33*2d09714cSHong Zhang     /* assume indices are almost in increasing order, starting from its end saves computation */
341c239cc6SHong Zhang     entry = indices[--i];
351c239cc6SHong Zhang     if (lnk[entry] == idx_unset) { /* new entry */
361c239cc6SHong Zhang       do {
371c239cc6SHong Zhang         idx = lidx;
381c239cc6SHong Zhang         lidx  = lnk[idx];
391c239cc6SHong Zhang       } while (entry > lidx);
401c239cc6SHong Zhang       lnk[idx] = entry;
411c239cc6SHong Zhang       lnk[entry] = lidx;
421c239cc6SHong Zhang       n++;
431c239cc6SHong Zhang     }
441c239cc6SHong Zhang   }
451c239cc6SHong Zhang   *nlnk = n;
461c239cc6SHong Zhang   PetscFunctionReturn(0);
471c239cc6SHong Zhang }
481c239cc6SHong Zhang 
491c239cc6SHong Zhang 
502216b3a4SKris Buschelman static int logkey_matmatmult          = 0;
512216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
522216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
532216b3a4SKris Buschelman 
54c1f4806aSKris Buschelman #undef __FUNCT__
55c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
565c66b693SKris Buschelman /*@
575c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
5894e3eecaSKris Buschelman 
595c66b693SKris Buschelman    Collective on Mat
60d50806bdSBarry Smith 
615c66b693SKris Buschelman    Input Parameters:
625c66b693SKris Buschelman +  A - the left matrix
635c66b693SKris Buschelman -  B - the right matrix
645c66b693SKris Buschelman 
655c66b693SKris Buschelman    Output Parameters:
665c66b693SKris Buschelman .  C - the product matrix
675c66b693SKris Buschelman 
685c66b693SKris Buschelman    Notes:
695c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
705c66b693SKris Buschelman 
714d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
724d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
735c66b693SKris Buschelman 
745c66b693SKris Buschelman    Level: intermediate
755c66b693SKris Buschelman 
765c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
775c66b693SKris Buschelman @*/
78*2d09714cSHong Zhang int MatMatMult(Mat A,Mat B, Mat *C)
79*2d09714cSHong Zhang {
80d50806bdSBarry Smith   int  ierr;
81*2d09714cSHong Zhang 
82d50806bdSBarry Smith   PetscFunctionBegin;
834482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
84c9780b6fSBarry Smith   PetscValidType(A,1);
855c66b693SKris Buschelman   MatPreallocated(A);
865c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
875c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
88d50806bdSBarry Smith 
894482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
90c9780b6fSBarry Smith   PetscValidType(B,2);
915c66b693SKris Buschelman   MatPreallocated(B);
925c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
935c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
94d50806bdSBarry Smith 
954482741eSBarry Smith   PetscValidPointer(C,3);
964482741eSBarry Smith 
975c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
98d50806bdSBarry Smith 
996284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1006284ec50SHong Zhang   ierr = (*A->ops->matmult)(A,B,C);CHKERRQ(ierr);
1016284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1024d3841fdSKris Buschelman 
1036284ec50SHong Zhang   PetscFunctionReturn(0);
1046284ec50SHong Zhang }
1056284ec50SHong Zhang 
1066284ec50SHong Zhang #undef __FUNCT__
1076284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
108*2d09714cSHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B, Mat *C)
109*2d09714cSHong Zhang {
110*2d09714cSHong Zhang   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq,C_mpi;
111*2d09714cSHong Zhang   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
112*2d09714cSHong Zhang   Mat_SeqAIJ    *c;
113*2d09714cSHong Zhang   MatScalar     *ca;
114*2d09714cSHong Zhang   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz;
115*2d09714cSHong Zhang   IS            isrow,iscol;
1166284ec50SHong Zhang 
1176284ec50SHong Zhang   PetscFunctionBegin;
118*2d09714cSHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
119*2d09714cSHong Zhang   start = a->cstart;
120*2d09714cSHong Zhang   cmap  = a->garray;
121*2d09714cSHong Zhang   nzA   = a->A->n;
122*2d09714cSHong Zhang   nzB   = a->B->n;
123*2d09714cSHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
124*2d09714cSHong Zhang   ncols = 0;
125*2d09714cSHong Zhang   for (i=0; i<nzB; i++) {
126*2d09714cSHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
127*2d09714cSHong Zhang     else break;
128*2d09714cSHong Zhang   }
129*2d09714cSHong Zhang   imark = i;
130*2d09714cSHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
131*2d09714cSHong Zhang   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
132*2d09714cSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
133*2d09714cSHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
134*2d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
135*2d09714cSHong Zhang   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr);
136*2d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
137*2d09714cSHong Zhang   B_seq = bseq[0];
1386284ec50SHong Zhang 
139*2d09714cSHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
140*2d09714cSHong Zhang   start = a->rstart; end = a->rend;
141*2d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
142*2d09714cSHong Zhang   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr);
143*2d09714cSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
144*2d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
145*2d09714cSHong Zhang   A_seq = aseq[0];
146*2d09714cSHong Zhang 
147*2d09714cSHong Zhang   /* compute C_seq = A_seq * B_seq */
148*2d09714cSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, &C_seq);CHKERRQ(ierr);
149*2d09714cSHong Zhang   /*
150*2d09714cSHong Zhang   int rank;
151*2d09714cSHong Zhang   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
152*2d09714cSHong Zhang   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
153*2d09714cSHong Zhang   */
154*2d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
155*2d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
156*2d09714cSHong Zhang 
157*2d09714cSHong Zhang   /* create a mpi matrix C_mpi that has C_seq as its local entries */
158*2d09714cSHong Zhang   ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr);
159*2d09714cSHong Zhang   ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr);
160*2d09714cSHong Zhang 
161*2d09714cSHong Zhang   c  = (Mat_SeqAIJ*)C_seq->data;
162*2d09714cSHong Zhang   ci = c->i; cj = c->j; ca = c->a;
163*2d09714cSHong Zhang   ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr);
164*2d09714cSHong Zhang   o_nnz = d_nnz + C_seq->m;
165*2d09714cSHong Zhang   nzA   = end-start;    /* max nonezero cols of the local diagonal part of C_mpi */
166*2d09714cSHong Zhang   nzB   = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */
167*2d09714cSHong Zhang   for (i=0; i< C_seq->m; i++){
168*2d09714cSHong Zhang     ncols = ci[i+1] - ci[i];
169*2d09714cSHong Zhang     d_nnz[i] = PetscMin(ncols,nzA);
170*2d09714cSHong Zhang     o_nnz[i] = PetscMin(ncols,nzB);
171*2d09714cSHong Zhang   }
172*2d09714cSHong Zhang   ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr);
173*2d09714cSHong Zhang   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
174*2d09714cSHong Zhang 
175*2d09714cSHong Zhang   /* set row values of C_mpi */
176*2d09714cSHong Zhang   for (i=0; i< C_seq->m; i++){
177*2d09714cSHong Zhang     grow  = start + i;
178*2d09714cSHong Zhang     ncols = ci[i+1] - ci[i];
179*2d09714cSHong Zhang     ierr = MatSetValues(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr);
180*2d09714cSHong Zhang   }
181*2d09714cSHong Zhang   ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182*2d09714cSHong Zhang   ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
183*2d09714cSHong Zhang   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
184*2d09714cSHong Zhang 
185*2d09714cSHong Zhang   *C = C_mpi;
186d50806bdSBarry Smith   PetscFunctionReturn(0);
187d50806bdSBarry Smith }
1885c66b693SKris Buschelman 
1895c66b693SKris Buschelman #undef __FUNCT__
1905c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
1915c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) {
1925c66b693SKris Buschelman   int ierr;
1934d3841fdSKris Buschelman   char symfunct[80],numfunct[80];
1945c66b693SKris Buschelman   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
1955c66b693SKris Buschelman 
1965c66b693SKris Buschelman   PetscFunctionBegin;
1974d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1984d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1994d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2004d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2014d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2025c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2034d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2044d3841fdSKris Buschelman 
2054d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
2065c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2074d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2084d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2094d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2104d3841fdSKris Buschelman 
2115c66b693SKris Buschelman   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
2125c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2135c66b693SKris Buschelman   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
2145c66b693SKris Buschelman   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
2155c66b693SKris Buschelman   PetscFunctionReturn(0);
2165c66b693SKris Buschelman }
2175c66b693SKris Buschelman 
218c1f4806aSKris Buschelman #undef __FUNCT__
219c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
2205c66b693SKris Buschelman /*@
2215c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
2225c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
2235c66b693SKris Buschelman 
2245c66b693SKris Buschelman    Collective on Mat
2255c66b693SKris Buschelman 
2265c66b693SKris Buschelman    Input Parameters:
2275c66b693SKris Buschelman +  A - the left matrix
2285c66b693SKris Buschelman -  B - the right matrix
2295c66b693SKris Buschelman 
2305c66b693SKris Buschelman    Output Parameters:
2315c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
2325c66b693SKris Buschelman 
2335c66b693SKris Buschelman    Notes:
2344d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
2355c66b693SKris Buschelman 
2364d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
2375c66b693SKris Buschelman 
2385c66b693SKris Buschelman    Level: intermediate
2395c66b693SKris Buschelman 
2405c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
2415c66b693SKris Buschelman @*/
2425c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
2435c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
2445c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
2455c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
2465c66b693SKris Buschelman   int  ierr;
2474d3841fdSKris Buschelman   char symfunct[80];
2485c66b693SKris Buschelman   int  (*symbolic)(Mat,Mat,Mat *);
2495c66b693SKris Buschelman 
2505c66b693SKris Buschelman   PetscFunctionBegin;
2514482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
252c9780b6fSBarry Smith   PetscValidType(A,1);
2535c66b693SKris Buschelman   MatPreallocated(A);
2545c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2555c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2565c66b693SKris Buschelman 
2574482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
258c9780b6fSBarry Smith   PetscValidType(B,2);
2595c66b693SKris Buschelman   MatPreallocated(B);
2605c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2615c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2624482741eSBarry Smith   PetscValidPointer(C,3);
2634482741eSBarry Smith 
2645c66b693SKris Buschelman 
2655c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
2665c66b693SKris Buschelman 
2674d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2684d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2694d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2704d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2714d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2724d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2734d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2745c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2755c66b693SKris Buschelman 
2765c66b693SKris Buschelman   PetscFunctionReturn(0);
2775c66b693SKris Buschelman }
27858c24d83SHong Zhang 
27958c24d83SHong Zhang #undef __FUNCT__
28058c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
28158c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
28258c24d83SHong Zhang {
28358c24d83SHong Zhang   int            ierr;
28458c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
28558c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
28658c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
2871c239cc6SHong Zhang   int            *ci,*cj,*lnk,idx0,idx;
2885c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
289bf812060SBarry Smith   int            i,j,anzi,brow,bnzj,cnzi,nlnk;
29058c24d83SHong Zhang   MatScalar      *ca;
29158c24d83SHong Zhang 
29258c24d83SHong Zhang   PetscFunctionBegin;
2935c66b693SKris Buschelman   /* Start timers */
29458c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
29558c24d83SHong Zhang   /* Set up */
29658c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
29758c24d83SHong Zhang   /* free space for accumulating nonzero column info */
29858c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
29958c24d83SHong Zhang   ci[0] = 0;
30058c24d83SHong Zhang 
30158c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
30258c24d83SHong Zhang   for (i=0; i<bn; i++) lnk[i] = -1;
30358c24d83SHong Zhang 
30458c24d83SHong Zhang   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
30558c24d83SHong Zhang   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
30658c24d83SHong Zhang   current_space = free_space;
30758c24d83SHong Zhang 
30858c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
30958c24d83SHong Zhang   for (i=0;i<am;i++) {
31058c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
31158c24d83SHong Zhang     cnzi = 0;
31258c24d83SHong Zhang     lnk[bn] = bn;
313*2d09714cSHong Zhang     j       = anzi;
314*2d09714cSHong Zhang     aj      = a->j + ai[i];
315*2d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
316*2d09714cSHong Zhang       j--;
317*2d09714cSHong Zhang       brow = *(aj + j);
31858c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
31958c24d83SHong Zhang       bjj  = bj + bi[brow];
3201c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
3211c239cc6SHong Zhang       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
3221c239cc6SHong Zhang       cnzi += nlnk;
32358c24d83SHong Zhang     }
32458c24d83SHong Zhang 
32558c24d83SHong Zhang     /* If free space is not available, make more free space */
32658c24d83SHong Zhang     /* Double the amount of total space in the list */
32758c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
32858c24d83SHong Zhang       printf("...%d -th row, double space ...\n",i);
32958c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
33058c24d83SHong Zhang     }
33158c24d83SHong Zhang 
33258c24d83SHong Zhang     /* Copy data into free space, and zero out denserow and lnk */
33358c24d83SHong Zhang     idx = bn;
33458c24d83SHong Zhang     for (j=0; j<cnzi; j++){
33558c24d83SHong Zhang       idx0 = idx;
33658c24d83SHong Zhang       idx  = lnk[idx0];
33758c24d83SHong Zhang       *current_space->array++ = idx;
33858c24d83SHong Zhang       lnk[idx0] = -1;
33958c24d83SHong Zhang     }
34058c24d83SHong Zhang     lnk[idx] = -1;
34158c24d83SHong Zhang 
34258c24d83SHong Zhang     current_space->local_used      += cnzi;
34358c24d83SHong Zhang     current_space->local_remaining -= cnzi;
34458c24d83SHong Zhang 
34558c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
34658c24d83SHong Zhang   }
34758c24d83SHong Zhang 
34858c24d83SHong Zhang   /* Column indices are in the list of free space */
34958c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
35058c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
35158c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
35258c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
35358c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
35458c24d83SHong Zhang 
35558c24d83SHong Zhang   /* Allocate space for ca */
35658c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
35758c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
35858c24d83SHong Zhang 
35958c24d83SHong Zhang   /* put together the new matrix */
36058c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
36158c24d83SHong Zhang 
36258c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
36358c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
36458c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
36558c24d83SHong Zhang   c->freedata = PETSC_TRUE;
36658c24d83SHong Zhang   c->nonew    = 0;
36758c24d83SHong Zhang 
36858c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
36958c24d83SHong Zhang   PetscFunctionReturn(0);
37058c24d83SHong Zhang }
371d50806bdSBarry Smith 
372c1f4806aSKris Buschelman #undef __FUNCT__
373c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3745c66b693SKris Buschelman /*@
3755c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3765c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3775c66b693SKris Buschelman 
3785c66b693SKris Buschelman    Collective on Mat
3795c66b693SKris Buschelman 
3805c66b693SKris Buschelman    Input Parameters:
3815c66b693SKris Buschelman +  A - the left matrix
3825c66b693SKris Buschelman -  B - the right matrix
3835c66b693SKris Buschelman 
3845c66b693SKris Buschelman    Output Parameters:
3855c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3865c66b693SKris Buschelman 
3875c66b693SKris Buschelman    Notes:
3885c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3895c66b693SKris Buschelman 
3905c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3915c66b693SKris Buschelman 
3925c66b693SKris Buschelman    Level: intermediate
3935c66b693SKris Buschelman 
3945c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3955c66b693SKris Buschelman @*/
3965c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
3975c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
3985c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
3995c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
4005c66b693SKris Buschelman   int ierr;
4014d3841fdSKris Buschelman   char numfunct[80];
4025c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
4035c66b693SKris Buschelman 
4045c66b693SKris Buschelman   PetscFunctionBegin;
4055c66b693SKris Buschelman 
4064482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
407c9780b6fSBarry Smith   PetscValidType(A,1);
4085c66b693SKris Buschelman   MatPreallocated(A);
4095c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4105c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4115c66b693SKris Buschelman 
4124482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
413c9780b6fSBarry Smith   PetscValidType(B,2);
4145c66b693SKris Buschelman   MatPreallocated(B);
4155c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4165c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4175c66b693SKris Buschelman 
4184482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
419c9780b6fSBarry Smith   PetscValidType(C,3);
4205c66b693SKris Buschelman   MatPreallocated(C);
4215c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4225c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4235c66b693SKris Buschelman 
4245c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
4255c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
4265c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
4275c66b693SKris Buschelman 
4284d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
4294d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
4304d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
4314d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4324d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
4334d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4344d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
4354d3841fdSKris Buschelman 
4365c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
4375c66b693SKris Buschelman 
4385c66b693SKris Buschelman   PetscFunctionReturn(0);
4395c66b693SKris Buschelman }
4405c66b693SKris Buschelman 
441d50806bdSBarry Smith #undef __FUNCT__
44294e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
44394e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
444d50806bdSBarry Smith {
44594e3eecaSKris Buschelman   int        ierr,flops=0;
446d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
447d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
448d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
449d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4505c66b693SKris Buschelman   int        am=A->M,cn=C->N;
45194e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
452d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
453d50806bdSBarry Smith 
454d50806bdSBarry Smith   PetscFunctionBegin;
455d50806bdSBarry Smith 
4565c66b693SKris Buschelman   /* Start timers */
457d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
45894e3eecaSKris Buschelman 
459d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
460d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
461d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
462d50806bdSBarry Smith   /* Traverse A row-wise. */
463d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
464d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
465d50806bdSBarry Smith   for (i=0;i<am;i++) {
466d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
467d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
468d50806bdSBarry Smith       brow = *aj++;
469d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
470d50806bdSBarry Smith       bjj  = bj + bi[brow];
471d50806bdSBarry Smith       baj  = ba + bi[brow];
472d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
473d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
474d50806bdSBarry Smith       }
475d50806bdSBarry Smith       flops += 2*bnzi;
476d50806bdSBarry Smith       aa++;
477d50806bdSBarry Smith     }
478d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
479d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
480d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
481d50806bdSBarry Smith       ca[j] = temp[cj[j]];
482d50806bdSBarry Smith       temp[cj[j]] = 0.0;
483d50806bdSBarry Smith     }
484d50806bdSBarry Smith     ca += cnzi;
485d50806bdSBarry Smith     cj += cnzi;
486d50806bdSBarry Smith   }
487716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
488716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
489716bacf3SKris Buschelman 
490d50806bdSBarry Smith   /* Free temp */
491d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
492d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
493d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
494d50806bdSBarry Smith   PetscFunctionReturn(0);
495d50806bdSBarry Smith }
496d50806bdSBarry Smith 
497d50806bdSBarry Smith #undef __FUNCT__
4985c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
4995c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
500d50806bdSBarry Smith   int ierr;
501d50806bdSBarry Smith 
502d50806bdSBarry Smith   PetscFunctionBegin;
5036284ec50SHong Zhang #ifdef OLD
5042216b3a4SKris Buschelman   if (!logkey_matmatmult) {
5052216b3a4SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
5062216b3a4SKris Buschelman   }
5075c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij",
5085c66b693SKris Buschelman                                            "MatMatMult_SeqAIJ_SeqAIJ",
5095c66b693SKris Buschelman                                            MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5106284ec50SHong Zhang #endif
5115c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
5125c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
513d50806bdSBarry Smith   }
5145c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
5155c66b693SKris Buschelman                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
5165c66b693SKris Buschelman                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5175c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
5185c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
51994e3eecaSKris Buschelman   }
5205c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
5215c66b693SKris Buschelman                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
5225c66b693SKris Buschelman                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
52394e3eecaSKris Buschelman   PetscFunctionReturn(0);
52494e3eecaSKris Buschelman }
525