1 /* 2 Defines matrix-matrix product routines for pairs of AIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatMatMult" 13 /*@ 14 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the left matrix 20 . B - the right matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of AIJ matrices and classes 31 which inherit from AIJ. C will be of type MATAIJ. 32 33 Level: intermediate 34 35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 36 @*/ 37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50 PetscValidType(B,2); 51 MatPreallocated(B); 52 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56 57 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58 59 /* For now, we do not dispatch based on the type of A and B */ 60 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61 fA = A->ops->matmult; 62 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63 fB = B->ops->matmult; 64 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66 67 ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 68 ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 69 ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 70 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (scall == MAT_INITIAL_MATRIX){ 82 ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83 } else if (scall == MAT_REUSE_MATRIX){ 84 ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85 } else { 86 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 if (scall == MAT_INITIAL_MATRIX){ 98 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 99 } 100 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatMatMultSymbolic" 106 /*@ 107 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 108 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 109 110 Collective on Mat 111 112 Input Parameters: 113 + A - the left matrix 114 . B - the right matrix 115 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 116 117 Output Parameters: 118 . C - the matrix containing the ij structure of product matrix 119 120 Notes: 121 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 122 123 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 124 125 Level: intermediate 126 127 .seealso: MatMatMult(),MatMatMultNumeric() 128 @*/ 129 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 130 PetscErrorCode ierr; 131 PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 132 PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 136 PetscValidType(A,1); 137 MatPreallocated(A); 138 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 139 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 140 141 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 142 PetscValidType(B,2); 143 MatPreallocated(B); 144 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 145 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 146 PetscValidPointer(C,3); 147 148 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 149 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 150 151 /* For now, we do not dispatch based on the type of A and P */ 152 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 153 Asymbolic = A->ops->matmultsymbolic; 154 if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 155 Bsymbolic = B->ops->matmultsymbolic; 156 if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 157 if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 158 159 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 160 ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 161 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 162 163 PetscFunctionReturn(0); 164 } 165 166 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 169 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 170 { 171 PetscErrorCode ierr; 172 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr; 173 174 PetscFunctionBegin; 175 ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 176 ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 177 ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 178 ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr); 179 ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr); 180 ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 181 ierr = PetscFree(mult);CHKERRQ(ierr); 182 183 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 184 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 190 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 191 { 192 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 193 PetscErrorCode ierr; 194 int *idx,i,start,end,ncols,nzA,nzB,*cmap; 195 Mat_MatMatMultMPI *mult; 196 197 PetscFunctionBegin; 198 if (a->cstart != b->rstart || a->cend != b->rend){ 199 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 200 } 201 ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 202 203 /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 204 start = a->cstart; 205 cmap = a->garray; 206 nzA = a->A->n; 207 nzB = a->B->n; 208 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 209 ncols = 0; 210 for (i=0; i<nzB; i++) { 211 if (cmap[i] < start) idx[ncols++] = cmap[i]; 212 else break; 213 } 214 mult->brstart = i; 215 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 216 for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i]; 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr); 218 ierr = PetscFree(idx);CHKERRQ(ierr); 219 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr); 220 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr); 221 222 /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 223 start = a->rstart; end = a->rend; 224 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 225 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr); 226 227 /* compute C_seq = A_seq * B_seq */ 228 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 229 230 /* create mpi matrix C by concatinating C_seq */ 231 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 232 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 233 234 /* attach the supporting struct to C for reuse of symbolic C */ 235 (*C)->spptr = (void*)mult; 236 (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 237 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 243 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 244 { 245 PetscErrorCode ierr; 246 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 247 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 248 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 249 int am=A->M,bn=B->N,bm=B->M; 250 int i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 251 MatScalar *ca; 252 PetscBT lnkbt; 253 254 PetscFunctionBegin; 255 /* Set up */ 256 /* Allocate ci array, arrays for fill computation and */ 257 /* free space for accumulating nonzero column info */ 258 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 259 ci[0] = 0; 260 261 /* create and initialize a linked list */ 262 nlnk = bn+1; 263 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 264 265 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 266 ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 267 current_space = free_space; 268 269 /* Determine symbolic info for each row of the product: */ 270 for (i=0;i<am;i++) { 271 anzi = ai[i+1] - ai[i]; 272 cnzi = 0; 273 j = anzi; 274 aj = a->j + ai[i]; 275 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 276 j--; 277 brow = *(aj + j); 278 bnzj = bi[brow+1] - bi[brow]; 279 bjj = bj + bi[brow]; 280 /* add non-zero cols of B into the sorted linked list lnk */ 281 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 282 cnzi += nlnk; 283 } 284 285 /* If free space is not available, make more free space */ 286 /* Double the amount of total space in the list */ 287 if (current_space->local_remaining<cnzi) { 288 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 289 nspacedouble++; 290 } 291 292 /* Copy data into free space, then initialize lnk */ 293 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 294 current_space->array += cnzi; 295 current_space->local_used += cnzi; 296 current_space->local_remaining -= cnzi; 297 298 ci[i+1] = ci[i] + cnzi; 299 } 300 301 /* Column indices are in the list of free space */ 302 /* Allocate space for cj, initialize cj, and */ 303 /* destroy list of free space and other temporary array(s) */ 304 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 305 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 306 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 307 308 /* Allocate space for ca */ 309 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 310 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 311 312 /* put together the new symbolic matrix */ 313 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 314 315 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 316 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 317 c = (Mat_SeqAIJ *)((*C)->data); 318 c->freedata = PETSC_TRUE; 319 c->nonew = 0; 320 321 if (nspacedouble){ 322 PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatMatMultNumeric" 329 /*@ 330 MatMatMultNumeric - Performs the numeric matrix-matrix product. 331 Call this routine after first calling MatMatMultSymbolic(). 332 333 Collective on Mat 334 335 Input Parameters: 336 + A - the left matrix 337 - B - the right matrix 338 339 Output Parameters: 340 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 341 342 Notes: 343 C must have been created with MatMatMultSymbolic. 344 345 This routine is currently only implemented for SeqAIJ type matrices. 346 347 Level: intermediate 348 349 .seealso: MatMatMult(),MatMatMultSymbolic() 350 @*/ 351 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 352 PetscErrorCode ierr; 353 PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 354 PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 355 356 PetscFunctionBegin; 357 358 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 359 PetscValidType(A,1); 360 MatPreallocated(A); 361 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 362 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 363 364 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 365 PetscValidType(B,2); 366 MatPreallocated(B); 367 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 368 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 369 370 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 371 PetscValidType(C,3); 372 MatPreallocated(C); 373 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 374 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 375 376 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 377 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 378 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 379 380 /* For now, we do not dispatch based on the type of A and B */ 381 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 382 Anumeric = A->ops->matmultnumeric; 383 if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 384 Bnumeric = B->ops->matmultnumeric; 385 if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 386 if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 387 388 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 389 ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 390 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 391 392 PetscFunctionReturn(0); 393 } 394 395 /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 398 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 399 { 400 PetscErrorCode ierr; 401 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr; 402 403 PetscFunctionBegin; 404 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr) 405 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr); 406 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 407 408 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 409 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 410 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 416 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 417 { 418 PetscErrorCode ierr; 419 int flops=0; 420 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 421 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 422 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 423 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 424 int am=A->M,cn=C->N; 425 int i,j,k,anzi,bnzi,cnzi,brow; 426 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 427 428 PetscFunctionBegin; 429 430 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 431 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 432 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 433 /* Traverse A row-wise. */ 434 /* Build the ith row in C by summing over nonzero columns in A, */ 435 /* the rows of B corresponding to nonzeros of A. */ 436 for (i=0;i<am;i++) { 437 anzi = ai[i+1] - ai[i]; 438 for (j=0;j<anzi;j++) { 439 brow = *aj++; 440 bnzi = bi[brow+1] - bi[brow]; 441 bjj = bj + bi[brow]; 442 baj = ba + bi[brow]; 443 for (k=0;k<bnzi;k++) { 444 temp[bjj[k]] += (*aa)*baj[k]; 445 } 446 flops += 2*bnzi; 447 aa++; 448 } 449 /* Store row back into C, and re-zero temp */ 450 cnzi = ci[i+1] - ci[i]; 451 for (j=0;j<cnzi;j++) { 452 ca[j] = temp[cj[j]]; 453 temp[cj[j]] = 0.0; 454 } 455 ca += cnzi; 456 cj += cnzi; 457 } 458 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 460 461 /* Free temp */ 462 ierr = PetscFree(temp);CHKERRQ(ierr); 463 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMatMultTranspose" 469 /*@ 470 MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 471 472 Collective on Mat 473 474 Input Parameters: 475 + A - the left matrix 476 . B - the right matrix 477 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 478 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 479 480 Output Parameters: 481 . C - the product matrix 482 483 Notes: 484 C will be created and must be destroyed by the user with MatDestroy(). 485 486 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 487 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 488 489 Level: intermediate 490 491 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 492 @*/ 493 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 494 { 495 PetscErrorCode ierr; 496 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 497 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 498 499 PetscFunctionBegin; 500 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 501 PetscValidType(A,1); 502 MatPreallocated(A); 503 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 504 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 505 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 506 PetscValidType(B,2); 507 MatPreallocated(B); 508 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 509 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 510 PetscValidPointer(C,3); 511 if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 512 513 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 514 515 fA = A->ops->matmulttranspose; 516 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 517 fB = B->ops->matmulttranspose; 518 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 519 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 520 521 ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 522 ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 523 ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 524 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 530 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 531 PetscErrorCode ierr; 532 533 PetscFunctionBegin; 534 if (scall == MAT_INITIAL_MATRIX){ 535 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 536 } 537 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 543 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 544 { 545 PetscErrorCode ierr; 546 Mat At; 547 int *ati,*atj; 548 549 PetscFunctionBegin; 550 /* create symbolic At */ 551 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 552 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 553 554 /* get symbolic C=At*B */ 555 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 556 557 /* clean up */ 558 ierr = MatDestroy(At);CHKERRQ(ierr); 559 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 566 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 567 { 568 PetscErrorCode ierr; 569 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 570 int am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 571 int cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 572 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 573 574 PetscFunctionBegin; 575 /* clear old values in C */ 576 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 577 578 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 579 for (i=0;i<am;i++) { 580 bj = b->j + bi[i]; 581 ba = b->a + bi[i]; 582 bnzi = bi[i+1] - bi[i]; 583 anzi = ai[i+1] - ai[i]; 584 for (j=0; j<anzi; j++) { 585 nextb = 0; 586 crow = *aj++; 587 cjj = cj + ci[crow]; 588 caj = ca + ci[crow]; 589 /* perform sparse axpy operation. Note cjj includes bj. */ 590 for (k=0; nextb<bnzi; k++) { 591 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 592 caj[k] += (*aa)*(*(ba+nextb)); 593 nextb++; 594 } 595 } 596 flops += 2*bnzi; 597 aa++; 598 } 599 } 600 601 /* Assemble the final matrix and clean up */ 602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 604 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607