1d50806bdSBarry Smith /* 22c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 8*2d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9d50806bdSBarry Smith 101c239cc6SHong Zhang /* 111c239cc6SHong Zhang Add a index set into a sorted linked list 121c239cc6SHong Zhang input: 131c239cc6SHong Zhang nidx - number of input indices 141c239cc6SHong Zhang indices - interger array 151c239cc6SHong Zhang idx_head - the header of the list 161c239cc6SHong Zhang idx_unset - the value indicating the entry in the list is not set yet 171c239cc6SHong Zhang lnk - linked list(an integer array) that is created 181c239cc6SHong Zhang output: 191c239cc6SHong Zhang nlnk - number of newly added indices 201c239cc6SHong Zhang lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices 211c239cc6SHong Zhang */ 221c239cc6SHong Zhang #undef __FUNCT__ 231c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd" 241c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk) 251c239cc6SHong Zhang { 261c239cc6SHong Zhang int i,idx,lidx,entry,n; 271c239cc6SHong Zhang 281c239cc6SHong Zhang PetscFunctionBegin; 291c239cc6SHong Zhang n = 0; 301c239cc6SHong Zhang lidx = idx_head; 311c239cc6SHong Zhang i = nidx; 321c239cc6SHong Zhang while (i){ 33*2d09714cSHong Zhang /* assume indices are almost in increasing order, starting from its end saves computation */ 341c239cc6SHong Zhang entry = indices[--i]; 351c239cc6SHong Zhang if (lnk[entry] == idx_unset) { /* new entry */ 361c239cc6SHong Zhang do { 371c239cc6SHong Zhang idx = lidx; 381c239cc6SHong Zhang lidx = lnk[idx]; 391c239cc6SHong Zhang } while (entry > lidx); 401c239cc6SHong Zhang lnk[idx] = entry; 411c239cc6SHong Zhang lnk[entry] = lidx; 421c239cc6SHong Zhang n++; 431c239cc6SHong Zhang } 441c239cc6SHong Zhang } 451c239cc6SHong Zhang *nlnk = n; 461c239cc6SHong Zhang PetscFunctionReturn(0); 471c239cc6SHong Zhang } 481c239cc6SHong Zhang 491c239cc6SHong Zhang 502216b3a4SKris Buschelman static int logkey_matmatmult = 0; 512216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 522216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 532216b3a4SKris Buschelman 54c1f4806aSKris Buschelman #undef __FUNCT__ 55c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 565c66b693SKris Buschelman /*@ 575c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 5894e3eecaSKris Buschelman 595c66b693SKris Buschelman Collective on Mat 60d50806bdSBarry Smith 615c66b693SKris Buschelman Input Parameters: 625c66b693SKris Buschelman + A - the left matrix 635c66b693SKris Buschelman - B - the right matrix 645c66b693SKris Buschelman 655c66b693SKris Buschelman Output Parameters: 665c66b693SKris Buschelman . C - the product matrix 675c66b693SKris Buschelman 685c66b693SKris Buschelman Notes: 695c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 705c66b693SKris Buschelman 714d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 724d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 735c66b693SKris Buschelman 745c66b693SKris Buschelman Level: intermediate 755c66b693SKris Buschelman 765c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 775c66b693SKris Buschelman @*/ 78*2d09714cSHong Zhang int MatMatMult(Mat A,Mat B, Mat *C) 79*2d09714cSHong Zhang { 80d50806bdSBarry Smith int ierr; 81*2d09714cSHong Zhang 82d50806bdSBarry Smith PetscFunctionBegin; 834482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 84c9780b6fSBarry Smith PetscValidType(A,1); 855c66b693SKris Buschelman MatPreallocated(A); 865c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 875c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 88d50806bdSBarry Smith 894482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 90c9780b6fSBarry Smith PetscValidType(B,2); 915c66b693SKris Buschelman MatPreallocated(B); 925c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 935c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 94d50806bdSBarry Smith 954482741eSBarry Smith PetscValidPointer(C,3); 964482741eSBarry Smith 975c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 98d50806bdSBarry Smith 996284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 1006284ec50SHong Zhang ierr = (*A->ops->matmult)(A,B,C);CHKERRQ(ierr); 1016284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 1024d3841fdSKris Buschelman 1036284ec50SHong Zhang PetscFunctionReturn(0); 1046284ec50SHong Zhang } 1056284ec50SHong Zhang 1066284ec50SHong Zhang #undef __FUNCT__ 1076284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 108*2d09714cSHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B, Mat *C) 109*2d09714cSHong Zhang { 110*2d09714cSHong Zhang Mat *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq,C_mpi; 111*2d09714cSHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 112*2d09714cSHong Zhang Mat_SeqAIJ *c; 113*2d09714cSHong Zhang MatScalar *ca; 114*2d09714cSHong Zhang int ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz; 115*2d09714cSHong Zhang IS isrow,iscol; 1166284ec50SHong Zhang 1176284ec50SHong Zhang PetscFunctionBegin; 118*2d09714cSHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 119*2d09714cSHong Zhang start = a->cstart; 120*2d09714cSHong Zhang cmap = a->garray; 121*2d09714cSHong Zhang nzA = a->A->n; 122*2d09714cSHong Zhang nzB = a->B->n; 123*2d09714cSHong Zhang ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 124*2d09714cSHong Zhang ncols = 0; 125*2d09714cSHong Zhang for (i=0; i<nzB; i++) { 126*2d09714cSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 127*2d09714cSHong Zhang else break; 128*2d09714cSHong Zhang } 129*2d09714cSHong Zhang imark = i; 130*2d09714cSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 131*2d09714cSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 132*2d09714cSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */ 133*2d09714cSHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 134*2d09714cSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr); 135*2d09714cSHong Zhang ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr); 136*2d09714cSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 137*2d09714cSHong Zhang B_seq = bseq[0]; 1386284ec50SHong Zhang 139*2d09714cSHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 140*2d09714cSHong Zhang start = a->rstart; end = a->rend; 141*2d09714cSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */ 142*2d09714cSHong Zhang ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr); 143*2d09714cSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 144*2d09714cSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 145*2d09714cSHong Zhang A_seq = aseq[0]; 146*2d09714cSHong Zhang 147*2d09714cSHong Zhang /* compute C_seq = A_seq * B_seq */ 148*2d09714cSHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, &C_seq);CHKERRQ(ierr); 149*2d09714cSHong Zhang /* 150*2d09714cSHong Zhang int rank; 151*2d09714cSHong Zhang ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 152*2d09714cSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n); 153*2d09714cSHong Zhang */ 154*2d09714cSHong Zhang ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr); 155*2d09714cSHong Zhang ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr); 156*2d09714cSHong Zhang 157*2d09714cSHong Zhang /* create a mpi matrix C_mpi that has C_seq as its local entries */ 158*2d09714cSHong Zhang ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr); 159*2d09714cSHong Zhang ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr); 160*2d09714cSHong Zhang 161*2d09714cSHong Zhang c = (Mat_SeqAIJ*)C_seq->data; 162*2d09714cSHong Zhang ci = c->i; cj = c->j; ca = c->a; 163*2d09714cSHong Zhang ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr); 164*2d09714cSHong Zhang o_nnz = d_nnz + C_seq->m; 165*2d09714cSHong Zhang nzA = end-start; /* max nonezero cols of the local diagonal part of C_mpi */ 166*2d09714cSHong Zhang nzB = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */ 167*2d09714cSHong Zhang for (i=0; i< C_seq->m; i++){ 168*2d09714cSHong Zhang ncols = ci[i+1] - ci[i]; 169*2d09714cSHong Zhang d_nnz[i] = PetscMin(ncols,nzA); 170*2d09714cSHong Zhang o_nnz[i] = PetscMin(ncols,nzB); 171*2d09714cSHong Zhang } 172*2d09714cSHong Zhang ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr); 173*2d09714cSHong Zhang ierr = PetscFree(d_nnz);CHKERRQ(ierr); 174*2d09714cSHong Zhang 175*2d09714cSHong Zhang /* set row values of C_mpi */ 176*2d09714cSHong Zhang for (i=0; i< C_seq->m; i++){ 177*2d09714cSHong Zhang grow = start + i; 178*2d09714cSHong Zhang ncols = ci[i+1] - ci[i]; 179*2d09714cSHong Zhang ierr = MatSetValues(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr); 180*2d09714cSHong Zhang } 181*2d09714cSHong Zhang ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182*2d09714cSHong Zhang ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183*2d09714cSHong Zhang ierr = MatDestroy(C_seq);CHKERRQ(ierr); 184*2d09714cSHong Zhang 185*2d09714cSHong Zhang *C = C_mpi; 186d50806bdSBarry Smith PetscFunctionReturn(0); 187d50806bdSBarry Smith } 1885c66b693SKris Buschelman 1895c66b693SKris Buschelman #undef __FUNCT__ 1905c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 1915c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) { 1925c66b693SKris Buschelman int ierr; 1934d3841fdSKris Buschelman char symfunct[80],numfunct[80]; 1945c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat); 1955c66b693SKris Buschelman 1965c66b693SKris Buschelman PetscFunctionBegin; 1974d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 1984d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 1994d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 2004d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2014d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2025c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2034d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2044d3841fdSKris Buschelman 2054d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 2065c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 2074d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2084d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 2094d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2104d3841fdSKris Buschelman 2115c66b693SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 2125c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 2135c66b693SKris Buschelman ierr = (*numeric)(A,B,*C);CHKERRQ(ierr); 2145c66b693SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 2155c66b693SKris Buschelman PetscFunctionReturn(0); 2165c66b693SKris Buschelman } 2175c66b693SKris Buschelman 218c1f4806aSKris Buschelman #undef __FUNCT__ 219c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 2205c66b693SKris Buschelman /*@ 2215c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 2225c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 2235c66b693SKris Buschelman 2245c66b693SKris Buschelman Collective on Mat 2255c66b693SKris Buschelman 2265c66b693SKris Buschelman Input Parameters: 2275c66b693SKris Buschelman + A - the left matrix 2285c66b693SKris Buschelman - B - the right matrix 2295c66b693SKris Buschelman 2305c66b693SKris Buschelman Output Parameters: 2315c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 2325c66b693SKris Buschelman 2335c66b693SKris Buschelman Notes: 2344d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 2355c66b693SKris Buschelman 2364d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 2375c66b693SKris Buschelman 2385c66b693SKris Buschelman Level: intermediate 2395c66b693SKris Buschelman 2405c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 2415c66b693SKris Buschelman @*/ 2425c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) { 2435c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 2445c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 2455c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 2465c66b693SKris Buschelman int ierr; 2474d3841fdSKris Buschelman char symfunct[80]; 2485c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat *); 2495c66b693SKris Buschelman 2505c66b693SKris Buschelman PetscFunctionBegin; 2514482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 252c9780b6fSBarry Smith PetscValidType(A,1); 2535c66b693SKris Buschelman MatPreallocated(A); 2545c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2555c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2565c66b693SKris Buschelman 2574482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 258c9780b6fSBarry Smith PetscValidType(B,2); 2595c66b693SKris Buschelman MatPreallocated(B); 2605c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2615c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2624482741eSBarry Smith PetscValidPointer(C,3); 2634482741eSBarry Smith 2645c66b693SKris Buschelman 2655c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 2665c66b693SKris Buschelman 2674d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 2684d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 2694d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 2704d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2714d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2724d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2734d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2745c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 2755c66b693SKris Buschelman 2765c66b693SKris Buschelman PetscFunctionReturn(0); 2775c66b693SKris Buschelman } 27858c24d83SHong Zhang 27958c24d83SHong Zhang #undef __FUNCT__ 28058c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 28158c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 28258c24d83SHong Zhang { 28358c24d83SHong Zhang int ierr; 28458c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 28558c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 28658c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 2871c239cc6SHong Zhang int *ci,*cj,*lnk,idx0,idx; 2885c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 289bf812060SBarry Smith int i,j,anzi,brow,bnzj,cnzi,nlnk; 29058c24d83SHong Zhang MatScalar *ca; 29158c24d83SHong Zhang 29258c24d83SHong Zhang PetscFunctionBegin; 2935c66b693SKris Buschelman /* Start timers */ 29458c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 29558c24d83SHong Zhang /* Set up */ 29658c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 29758c24d83SHong Zhang /* free space for accumulating nonzero column info */ 29858c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 29958c24d83SHong Zhang ci[0] = 0; 30058c24d83SHong Zhang 30158c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 30258c24d83SHong Zhang for (i=0; i<bn; i++) lnk[i] = -1; 30358c24d83SHong Zhang 30458c24d83SHong Zhang /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 30558c24d83SHong Zhang ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 30658c24d83SHong Zhang current_space = free_space; 30758c24d83SHong Zhang 30858c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 30958c24d83SHong Zhang for (i=0;i<am;i++) { 31058c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 31158c24d83SHong Zhang cnzi = 0; 31258c24d83SHong Zhang lnk[bn] = bn; 313*2d09714cSHong Zhang j = anzi; 314*2d09714cSHong Zhang aj = a->j + ai[i]; 315*2d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 316*2d09714cSHong Zhang j--; 317*2d09714cSHong Zhang brow = *(aj + j); 31858c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 31958c24d83SHong Zhang bjj = bj + bi[brow]; 3201c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 3211c239cc6SHong Zhang ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr); 3221c239cc6SHong Zhang cnzi += nlnk; 32358c24d83SHong Zhang } 32458c24d83SHong Zhang 32558c24d83SHong Zhang /* If free space is not available, make more free space */ 32658c24d83SHong Zhang /* Double the amount of total space in the list */ 32758c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 32858c24d83SHong Zhang printf("...%d -th row, double space ...\n",i); 32958c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 33058c24d83SHong Zhang } 33158c24d83SHong Zhang 33258c24d83SHong Zhang /* Copy data into free space, and zero out denserow and lnk */ 33358c24d83SHong Zhang idx = bn; 33458c24d83SHong Zhang for (j=0; j<cnzi; j++){ 33558c24d83SHong Zhang idx0 = idx; 33658c24d83SHong Zhang idx = lnk[idx0]; 33758c24d83SHong Zhang *current_space->array++ = idx; 33858c24d83SHong Zhang lnk[idx0] = -1; 33958c24d83SHong Zhang } 34058c24d83SHong Zhang lnk[idx] = -1; 34158c24d83SHong Zhang 34258c24d83SHong Zhang current_space->local_used += cnzi; 34358c24d83SHong Zhang current_space->local_remaining -= cnzi; 34458c24d83SHong Zhang 34558c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 34658c24d83SHong Zhang } 34758c24d83SHong Zhang 34858c24d83SHong Zhang /* Column indices are in the list of free space */ 34958c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 35058c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 35158c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 35258c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 35358c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 35458c24d83SHong Zhang 35558c24d83SHong Zhang /* Allocate space for ca */ 35658c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 35758c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 35858c24d83SHong Zhang 35958c24d83SHong Zhang /* put together the new matrix */ 36058c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 36158c24d83SHong Zhang 36258c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 36358c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 36458c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 36558c24d83SHong Zhang c->freedata = PETSC_TRUE; 36658c24d83SHong Zhang c->nonew = 0; 36758c24d83SHong Zhang 36858c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 36958c24d83SHong Zhang PetscFunctionReturn(0); 37058c24d83SHong Zhang } 371d50806bdSBarry Smith 372c1f4806aSKris Buschelman #undef __FUNCT__ 373c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3745c66b693SKris Buschelman /*@ 3755c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3765c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3775c66b693SKris Buschelman 3785c66b693SKris Buschelman Collective on Mat 3795c66b693SKris Buschelman 3805c66b693SKris Buschelman Input Parameters: 3815c66b693SKris Buschelman + A - the left matrix 3825c66b693SKris Buschelman - B - the right matrix 3835c66b693SKris Buschelman 3845c66b693SKris Buschelman Output Parameters: 3855c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3865c66b693SKris Buschelman 3875c66b693SKris Buschelman Notes: 3885c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3895c66b693SKris Buschelman 3905c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3915c66b693SKris Buschelman 3925c66b693SKris Buschelman Level: intermediate 3935c66b693SKris Buschelman 3945c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3955c66b693SKris Buschelman @*/ 3965c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){ 3975c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 3985c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 3995c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 4005c66b693SKris Buschelman int ierr; 4014d3841fdSKris Buschelman char numfunct[80]; 4025c66b693SKris Buschelman int (*numeric)(Mat,Mat,Mat); 4035c66b693SKris Buschelman 4045c66b693SKris Buschelman PetscFunctionBegin; 4055c66b693SKris Buschelman 4064482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 407c9780b6fSBarry Smith PetscValidType(A,1); 4085c66b693SKris Buschelman MatPreallocated(A); 4095c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4105c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4115c66b693SKris Buschelman 4124482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 413c9780b6fSBarry Smith PetscValidType(B,2); 4145c66b693SKris Buschelman MatPreallocated(B); 4155c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4165c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4175c66b693SKris Buschelman 4184482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 419c9780b6fSBarry Smith PetscValidType(C,3); 4205c66b693SKris Buschelman MatPreallocated(C); 4215c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4225c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4235c66b693SKris Buschelman 4245c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 4255c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 4265c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 4275c66b693SKris Buschelman 4284d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 4294d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 4304d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 4314d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4324d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 4334d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4344d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 4354d3841fdSKris Buschelman 4365c66b693SKris Buschelman ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 4375c66b693SKris Buschelman 4385c66b693SKris Buschelman PetscFunctionReturn(0); 4395c66b693SKris Buschelman } 4405c66b693SKris Buschelman 441d50806bdSBarry Smith #undef __FUNCT__ 44294e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 44394e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 444d50806bdSBarry Smith { 44594e3eecaSKris Buschelman int ierr,flops=0; 446d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 447d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 448d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 449d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4505c66b693SKris Buschelman int am=A->M,cn=C->N; 45194e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 452d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 453d50806bdSBarry Smith 454d50806bdSBarry Smith PetscFunctionBegin; 455d50806bdSBarry Smith 4565c66b693SKris Buschelman /* Start timers */ 457d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 45894e3eecaSKris Buschelman 459d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 460d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 461d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 462d50806bdSBarry Smith /* Traverse A row-wise. */ 463d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 464d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 465d50806bdSBarry Smith for (i=0;i<am;i++) { 466d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 467d50806bdSBarry Smith for (j=0;j<anzi;j++) { 468d50806bdSBarry Smith brow = *aj++; 469d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 470d50806bdSBarry Smith bjj = bj + bi[brow]; 471d50806bdSBarry Smith baj = ba + bi[brow]; 472d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 473d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 474d50806bdSBarry Smith } 475d50806bdSBarry Smith flops += 2*bnzi; 476d50806bdSBarry Smith aa++; 477d50806bdSBarry Smith } 478d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 479d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 480d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 481d50806bdSBarry Smith ca[j] = temp[cj[j]]; 482d50806bdSBarry Smith temp[cj[j]] = 0.0; 483d50806bdSBarry Smith } 484d50806bdSBarry Smith ca += cnzi; 485d50806bdSBarry Smith cj += cnzi; 486d50806bdSBarry Smith } 487716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 488716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 489716bacf3SKris Buschelman 490d50806bdSBarry Smith /* Free temp */ 491d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 492d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 493d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 494d50806bdSBarry Smith PetscFunctionReturn(0); 495d50806bdSBarry Smith } 496d50806bdSBarry Smith 497d50806bdSBarry Smith #undef __FUNCT__ 4985c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 4995c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) { 500d50806bdSBarry Smith int ierr; 501d50806bdSBarry Smith 502d50806bdSBarry Smith PetscFunctionBegin; 5036284ec50SHong Zhang #ifdef OLD 5042216b3a4SKris Buschelman if (!logkey_matmatmult) { 5052216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 5062216b3a4SKris Buschelman } 5075c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij", 5085c66b693SKris Buschelman "MatMatMult_SeqAIJ_SeqAIJ", 5095c66b693SKris Buschelman MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 5106284ec50SHong Zhang #endif 5115c66b693SKris Buschelman if (!logkey_matmatmult_symbolic) { 5125c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 513d50806bdSBarry Smith } 5145c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 5155c66b693SKris Buschelman "MatMatMult_Symbolic_SeqAIJ_SeqAIJ", 5165c66b693SKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 5175c66b693SKris Buschelman if (!logkey_matmatmult_numeric) { 5185c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 51994e3eecaSKris Buschelman } 5205c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 5215c66b693SKris Buschelman "MatMatMult_Numeric_SeqAIJ_SeqAIJ", 5225c66b693SKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 52394e3eecaSKris Buschelman PetscFunctionReturn(0); 52494e3eecaSKris Buschelman } 525