1 /* 2 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "petscbt.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatMatMult" 12 /*@ 13 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the left matrix 19 . B - the right matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of AIJ matrices and classes 30 which inherit from AIJ. C will be of type MATAIJ. 31 32 Level: intermediate 33 34 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 35 @*/ 36 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 37 { 38 PetscErrorCode ierr; 39 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 40 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 41 42 PetscFunctionBegin; 43 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44 PetscValidType(A,1); 45 MatPreallocated(A); 46 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 47 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 48 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 49 PetscValidType(B,2); 50 MatPreallocated(B); 51 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53 PetscValidPointer(C,3); 54 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and B */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->matmult; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 62 fB = B->ops->matmult; 63 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 64 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 65 66 ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 67 ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 69 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 75 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 if (scall == MAT_INITIAL_MATRIX){ 81 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 82 } 83 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatMatMultSymbolic" 89 /*@ 90 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 91 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 92 93 Collective on Mat 94 95 Input Parameters: 96 + A - the left matrix 97 . B - the right matrix 98 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 99 100 Output Parameters: 101 . C - the matrix containing the ij structure of product matrix 102 103 Notes: 104 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 105 106 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 107 108 Level: intermediate 109 110 .seealso: MatMatMult(),MatMatMultNumeric() 111 @*/ 112 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) 113 { 114 PetscErrorCode ierr; 115 PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 116 PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 117 118 PetscFunctionBegin; 119 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 120 PetscValidType(A,1); 121 MatPreallocated(A); 122 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 123 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 124 125 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 126 PetscValidType(B,2); 127 MatPreallocated(B); 128 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 129 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 130 PetscValidPointer(C,3); 131 132 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 133 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 134 135 /* For now, we do not dispatch based on the type of A and P */ 136 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 137 Asymbolic = A->ops->matmultsymbolic; 138 if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 139 Bsymbolic = B->ops->matmultsymbolic; 140 if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 141 if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 142 143 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 144 ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 145 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 146 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 152 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 153 { 154 PetscErrorCode ierr; 155 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 156 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 157 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 158 PetscInt am=A->M,bn=B->N,bm=B->M; 159 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 160 MatScalar *ca; 161 PetscBT lnkbt; 162 163 PetscFunctionBegin; 164 /* Set up */ 165 /* Allocate ci array, arrays for fill computation and */ 166 /* free space for accumulating nonzero column info */ 167 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 168 ci[0] = 0; 169 170 /* create and initialize a linked list */ 171 nlnk = bn+1; 172 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 174 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 175 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 176 current_space = free_space; 177 178 /* Determine symbolic info for each row of the product: */ 179 for (i=0;i<am;i++) { 180 anzi = ai[i+1] - ai[i]; 181 cnzi = 0; 182 j = anzi; 183 aj = a->j + ai[i]; 184 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 185 j--; 186 brow = *(aj + j); 187 bnzj = bi[brow+1] - bi[brow]; 188 bjj = bj + bi[brow]; 189 /* add non-zero cols of B into the sorted linked list lnk */ 190 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 191 cnzi += nlnk; 192 } 193 194 /* If free space is not available, make more free space */ 195 /* Double the amount of total space in the list */ 196 if (current_space->local_remaining<cnzi) { 197 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 198 nspacedouble++; 199 } 200 201 /* Copy data into free space, then initialize lnk */ 202 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 203 current_space->array += cnzi; 204 current_space->local_used += cnzi; 205 current_space->local_remaining -= cnzi; 206 207 ci[i+1] = ci[i] + cnzi; 208 } 209 210 /* Column indices are in the list of free space */ 211 /* Allocate space for cj, initialize cj, and */ 212 /* destroy list of free space and other temporary array(s) */ 213 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 214 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 215 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 216 217 /* Allocate space for ca */ 218 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 219 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 220 221 /* put together the new symbolic matrix */ 222 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 223 224 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 225 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 226 c = (Mat_SeqAIJ *)((*C)->data); 227 c->freedata = PETSC_TRUE; 228 c->nonew = 0; 229 230 if (nspacedouble){ 231 PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%D, nnz(A):%D, nnz(B):%D, fill:%g, nnz(C):%D\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 232 } 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMatMultNumeric" 238 /*@ 239 MatMatMultNumeric - Performs the numeric matrix-matrix product. 240 Call this routine after first calling MatMatMultSymbolic(). 241 242 Collective on Mat 243 244 Input Parameters: 245 + A - the left matrix 246 - B - the right matrix 247 248 Output Parameters: 249 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 250 251 Notes: 252 C must have been created with MatMatMultSymbolic. 253 254 This routine is currently only implemented for SeqAIJ type matrices. 255 256 Level: intermediate 257 258 .seealso: MatMatMult(),MatMatMultSymbolic() 259 @*/ 260 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C) 261 { 262 PetscErrorCode ierr; 263 PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 264 PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 265 266 PetscFunctionBegin; 267 268 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 269 PetscValidType(A,1); 270 MatPreallocated(A); 271 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 272 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 273 274 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 275 PetscValidType(B,2); 276 MatPreallocated(B); 277 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 278 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 279 280 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 281 PetscValidType(C,3); 282 MatPreallocated(C); 283 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 284 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 285 286 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->N,C->N); 287 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 288 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",A->M,C->M); 289 290 /* For now, we do not dispatch based on the type of A and B */ 291 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 292 Anumeric = A->ops->matmultnumeric; 293 if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 294 Bnumeric = B->ops->matmultnumeric; 295 if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 296 if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 297 298 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 299 ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 300 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 301 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 307 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 308 { 309 PetscErrorCode ierr; 310 PetscInt flops=0; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 312 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 313 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 314 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 315 PetscInt am=A->M,cm=C->M; 316 PetscInt i,j,k,anzi,bnzi,cnzi,brow,nextb; 317 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a; 318 319 PetscFunctionBegin; 320 /* clean old values in C */ 321 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 322 /* Traverse A row-wise. */ 323 /* Build the ith row in C by summing over nonzero columns in A, */ 324 /* the rows of B corresponding to nonzeros of A. */ 325 for (i=0;i<am;i++) { 326 anzi = ai[i+1] - ai[i]; 327 for (j=0;j<anzi;j++) { 328 brow = *aj++; 329 bnzi = bi[brow+1] - bi[brow]; 330 bjj = bj + bi[brow]; 331 baj = ba + bi[brow]; 332 nextb = 0; 333 for (k=0; nextb<bnzi; k++) { 334 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 335 ca[k] += (*aa)*baj[nextb++]; 336 } 337 } 338 flops += 2*bnzi; 339 aa++; 340 } 341 cnzi = ci[i+1] - ci[i]; 342 ca += cnzi; 343 cj += cnzi; 344 } 345 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 348 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatMatMultTranspose" 354 /*@ 355 MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 356 357 Collective on Mat 358 359 Input Parameters: 360 + A - the left matrix 361 . B - the right matrix 362 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 363 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 364 365 Output Parameters: 366 . C - the product matrix 367 368 Notes: 369 C will be created and must be destroyed by the user with MatDestroy(). 370 371 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 372 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 373 374 Level: intermediate 375 376 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 377 @*/ 378 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 379 { 380 PetscErrorCode ierr; 381 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 382 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 383 384 PetscFunctionBegin; 385 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 386 PetscValidType(A,1); 387 MatPreallocated(A); 388 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 389 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 390 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 391 PetscValidType(B,2); 392 MatPreallocated(B); 393 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 394 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 395 PetscValidPointer(C,3); 396 if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->M); 397 398 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 399 400 fA = A->ops->matmulttranspose; 401 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 402 fB = B->ops->matmulttranspose; 403 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 404 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 405 406 ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 407 ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 408 ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 409 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 415 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 if (scall == MAT_INITIAL_MATRIX){ 420 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 421 } 422 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 428 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 429 { 430 PetscErrorCode ierr; 431 Mat At; 432 PetscInt *ati,*atj; 433 434 PetscFunctionBegin; 435 /* create symbolic At */ 436 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 437 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 438 439 /* get symbolic C=At*B */ 440 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 441 442 /* clean up */ 443 ierr = MatDestroy(At);CHKERRQ(ierr); 444 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 445 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 451 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 452 { 453 PetscErrorCode ierr; 454 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 455 PetscInt am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 456 PetscInt cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 457 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 458 459 PetscFunctionBegin; 460 /* clear old values in C */ 461 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 462 463 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 464 for (i=0;i<am;i++) { 465 bj = b->j + bi[i]; 466 ba = b->a + bi[i]; 467 bnzi = bi[i+1] - bi[i]; 468 anzi = ai[i+1] - ai[i]; 469 for (j=0; j<anzi; j++) { 470 nextb = 0; 471 crow = *aj++; 472 cjj = cj + ci[crow]; 473 caj = ca + ci[crow]; 474 /* perform sparse axpy operation. Note cjj includes bj. */ 475 for (k=0; nextb<bnzi; k++) { 476 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 477 caj[k] += (*aa)*(*(ba+nextb)); 478 nextb++; 479 } 480 } 481 flops += 2*bnzi; 482 aa++; 483 } 484 } 485 486 /* Assemble the final matrix and clean up */ 487 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 488 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 489 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492