1 /* 2 Defines matrix-matrix product routines for pairs of AIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatMatMult" 13 /*@ 14 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the left matrix 20 . B - the right matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of AIJ matrices and classes 31 which inherit from AIJ. C will be of type MATAIJ. 32 33 Level: intermediate 34 35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 36 @*/ 37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50 PetscValidType(B,2); 51 MatPreallocated(B); 52 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56 57 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58 59 /* For now, we do not dispatch based on the type of A and B */ 60 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61 fA = A->ops->matmult; 62 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63 fB = B->ops->matmult; 64 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66 67 ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 68 ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 69 ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 70 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (scall == MAT_INITIAL_MATRIX){ 82 ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83 } else if (scall == MAT_REUSE_MATRIX){ 84 ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85 } else { 86 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 94 { 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 if (scall == MAT_INITIAL_MATRIX){ 99 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 100 } 101 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatMatMultSymbolic" 107 /*@ 108 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 109 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 110 111 Collective on Mat 112 113 Input Parameters: 114 + A - the left matrix 115 . B - the right matrix 116 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 117 118 Output Parameters: 119 . C - the matrix containing the ij structure of product matrix 120 121 Notes: 122 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 123 124 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 125 126 Level: intermediate 127 128 .seealso: MatMatMult(),MatMatMultNumeric() 129 @*/ 130 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) 131 { 132 PetscErrorCode ierr; 133 PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 134 PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 138 PetscValidType(A,1); 139 MatPreallocated(A); 140 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 141 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 142 143 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 144 PetscValidType(B,2); 145 MatPreallocated(B); 146 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 147 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 148 PetscValidPointer(C,3); 149 150 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 151 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 152 153 /* For now, we do not dispatch based on the type of A and P */ 154 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 155 Asymbolic = A->ops->matmultsymbolic; 156 if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 157 Bsymbolic = B->ops->matmultsymbolic; 158 if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 159 if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 160 161 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 162 ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 163 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 164 165 PetscFunctionReturn(0); 166 } 167 168 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 171 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 172 { 173 PetscErrorCode ierr; 174 Mat_MatMatMultMPI *mult; 175 PetscObjectContainer container; 176 177 PetscFunctionBegin; 178 ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 179 if (container) { 180 ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 181 ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 182 ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 183 ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 184 ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr); 185 ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr); 186 ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 187 188 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 189 ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr); 190 } 191 ierr = PetscFree(mult);CHKERRQ(ierr); 192 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 193 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 199 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 200 { 201 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 202 PetscErrorCode ierr; 203 PetscInt start,end; 204 Mat_MatMatMultMPI *mult; 205 PetscObjectContainer container; 206 207 PetscFunctionBegin; 208 if (a->cstart != b->rstart || a->cend != b->rend){ 209 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 210 } 211 ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 212 213 /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 214 ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr); 215 216 /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 217 start = a->rstart; end = a->rend; 218 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 219 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr); 220 221 /* compute C_seq = A_seq * B_seq */ 222 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 223 224 /* create mpi matrix C by concatinating C_seq */ 225 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 226 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 227 228 /* attach the supporting struct to C for reuse of symbolic C */ 229 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 230 ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr); 231 ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr); 232 233 (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 240 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 241 { 242 PetscErrorCode ierr; 243 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 244 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 245 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 246 PetscInt am=A->M,bn=B->N,bm=B->M; 247 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 248 MatScalar *ca; 249 PetscBT lnkbt; 250 251 PetscFunctionBegin; 252 /* Set up */ 253 /* Allocate ci array, arrays for fill computation and */ 254 /* free space for accumulating nonzero column info */ 255 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 256 ci[0] = 0; 257 258 /* create and initialize a linked list */ 259 nlnk = bn+1; 260 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 261 262 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 263 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 264 current_space = free_space; 265 266 /* Determine symbolic info for each row of the product: */ 267 for (i=0;i<am;i++) { 268 anzi = ai[i+1] - ai[i]; 269 cnzi = 0; 270 j = anzi; 271 aj = a->j + ai[i]; 272 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 273 j--; 274 brow = *(aj + j); 275 bnzj = bi[brow+1] - bi[brow]; 276 bjj = bj + bi[brow]; 277 /* add non-zero cols of B into the sorted linked list lnk */ 278 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 279 cnzi += nlnk; 280 } 281 282 /* If free space is not available, make more free space */ 283 /* Double the amount of total space in the list */ 284 if (current_space->local_remaining<cnzi) { 285 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 286 nspacedouble++; 287 } 288 289 /* Copy data into free space, then initialize lnk */ 290 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 291 current_space->array += cnzi; 292 current_space->local_used += cnzi; 293 current_space->local_remaining -= cnzi; 294 295 ci[i+1] = ci[i] + cnzi; 296 } 297 298 /* Column indices are in the list of free space */ 299 /* Allocate space for cj, initialize cj, and */ 300 /* destroy list of free space and other temporary array(s) */ 301 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 302 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 303 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 304 305 /* Allocate space for ca */ 306 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 307 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 308 309 /* put together the new symbolic matrix */ 310 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 311 312 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 313 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 314 c = (Mat_SeqAIJ *)((*C)->data); 315 c->freedata = PETSC_TRUE; 316 c->nonew = 0; 317 318 if (nspacedouble){ 319 PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatMatMultNumeric" 326 /*@ 327 MatMatMultNumeric - Performs the numeric matrix-matrix product. 328 Call this routine after first calling MatMatMultSymbolic(). 329 330 Collective on Mat 331 332 Input Parameters: 333 + A - the left matrix 334 - B - the right matrix 335 336 Output Parameters: 337 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 338 339 Notes: 340 C must have been created with MatMatMultSymbolic. 341 342 This routine is currently only implemented for SeqAIJ type matrices. 343 344 Level: intermediate 345 346 .seealso: MatMatMult(),MatMatMultSymbolic() 347 @*/ 348 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C) 349 { 350 PetscErrorCode ierr; 351 PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 352 PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 353 354 PetscFunctionBegin; 355 356 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 357 PetscValidType(A,1); 358 MatPreallocated(A); 359 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 360 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 361 362 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 363 PetscValidType(B,2); 364 MatPreallocated(B); 365 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 366 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 367 368 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 369 PetscValidType(C,3); 370 MatPreallocated(C); 371 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 372 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 373 374 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 375 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 376 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 377 378 /* For now, we do not dispatch based on the type of A and B */ 379 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 380 Anumeric = A->ops->matmultnumeric; 381 if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 382 Bnumeric = B->ops->matmultnumeric; 383 if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 384 if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 385 386 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 387 ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 388 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 389 390 PetscFunctionReturn(0); 391 } 392 393 /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 394 #undef __FUNCT__ 395 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 396 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 397 { 398 PetscErrorCode ierr; 399 Mat *seq; 400 Mat_MatMatMultMPI *mult; 401 PetscObjectContainer container; 402 403 PetscFunctionBegin; 404 ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 405 if (container) { 406 ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 407 } 408 409 seq = &mult->B_seq; 410 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 411 mult->B_seq = *seq; 412 413 seq = &mult->A_loc; 414 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 415 mult->A_loc = *seq; 416 417 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 418 419 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 420 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 421 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 427 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 428 { 429 PetscErrorCode ierr; 430 PetscInt flops=0; 431 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 432 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 433 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 434 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 435 PetscInt am=A->M,cn=C->N; 436 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 437 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 438 439 PetscFunctionBegin; 440 441 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 442 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 443 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 444 /* Traverse A row-wise. */ 445 /* Build the ith row in C by summing over nonzero columns in A, */ 446 /* the rows of B corresponding to nonzeros of A. */ 447 for (i=0;i<am;i++) { 448 anzi = ai[i+1] - ai[i]; 449 for (j=0;j<anzi;j++) { 450 brow = *aj++; 451 bnzi = bi[brow+1] - bi[brow]; 452 bjj = bj + bi[brow]; 453 baj = ba + bi[brow]; 454 for (k=0;k<bnzi;k++) { 455 temp[bjj[k]] += (*aa)*baj[k]; 456 } 457 flops += 2*bnzi; 458 aa++; 459 } 460 /* Store row back into C, and re-zero temp */ 461 cnzi = ci[i+1] - ci[i]; 462 for (j=0;j<cnzi;j++) { 463 ca[j] = temp[cj[j]]; 464 temp[cj[j]] = 0.0; 465 } 466 ca += cnzi; 467 cj += cnzi; 468 } 469 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 472 /* Free temp */ 473 ierr = PetscFree(temp);CHKERRQ(ierr); 474 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatMatMultTranspose" 480 /*@ 481 MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 482 483 Collective on Mat 484 485 Input Parameters: 486 + A - the left matrix 487 . B - the right matrix 488 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 489 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 490 491 Output Parameters: 492 . C - the product matrix 493 494 Notes: 495 C will be created and must be destroyed by the user with MatDestroy(). 496 497 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 498 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 499 500 Level: intermediate 501 502 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 503 @*/ 504 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 505 { 506 PetscErrorCode ierr; 507 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 508 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 509 510 PetscFunctionBegin; 511 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 512 PetscValidType(A,1); 513 MatPreallocated(A); 514 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 515 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 516 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 517 PetscValidType(B,2); 518 MatPreallocated(B); 519 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 520 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 521 PetscValidPointer(C,3); 522 if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 523 524 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 525 526 fA = A->ops->matmulttranspose; 527 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 528 fB = B->ops->matmulttranspose; 529 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 530 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 531 532 ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 533 ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 534 ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 535 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 541 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 if (scall == MAT_INITIAL_MATRIX){ 546 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 547 } 548 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 554 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 555 { 556 PetscErrorCode ierr; 557 Mat At; 558 PetscInt *ati,*atj; 559 560 PetscFunctionBegin; 561 /* create symbolic At */ 562 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 563 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 564 565 /* get symbolic C=At*B */ 566 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 567 568 /* clean up */ 569 ierr = MatDestroy(At);CHKERRQ(ierr); 570 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 571 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 577 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 578 { 579 PetscErrorCode ierr; 580 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 581 PetscInt am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 582 PetscInt cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 583 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 584 585 PetscFunctionBegin; 586 /* clear old values in C */ 587 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 588 589 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 590 for (i=0;i<am;i++) { 591 bj = b->j + bi[i]; 592 ba = b->a + bi[i]; 593 bnzi = bi[i+1] - bi[i]; 594 anzi = ai[i+1] - ai[i]; 595 for (j=0; j<anzi; j++) { 596 nextb = 0; 597 crow = *aj++; 598 cjj = cj + ci[crow]; 599 caj = ca + ci[crow]; 600 /* perform sparse axpy operation. Note cjj includes bj. */ 601 for (k=0; nextb<bnzi; k++) { 602 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 603 caj[k] += (*aa)*(*(ba+nextb)); 604 nextb++; 605 } 606 } 607 flops += 2*bnzi; 608 aa++; 609 } 610 } 611 612 /* Assemble the final matrix and clean up */ 613 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 614 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 615 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618