1 /* 2 Defines matrix-matrix product routines for pairs of AIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatMatMult" 13 /*@ 14 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the left matrix 20 . B - the right matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of AIJ matrices and classes 31 which inherit from AIJ. C will be of type MATAIJ. 32 33 Level: intermediate 34 35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 36 @*/ 37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50 PetscValidType(B,2); 51 MatPreallocated(B); 52 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56 57 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58 59 /* For now, we do not dispatch based on the type of A and B */ 60 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61 fA = A->ops->matmult; 62 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63 fB = B->ops->matmult; 64 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66 67 ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 68 ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 69 ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 70 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (scall == MAT_INITIAL_MATRIX){ 82 ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83 } else if (scall == MAT_REUSE_MATRIX){ 84 ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85 } else { 86 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 if (scall == MAT_INITIAL_MATRIX){ 98 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 99 } 100 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatMatMultSymbolic" 106 /*@ 107 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 108 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 109 110 Collective on Mat 111 112 Input Parameters: 113 + A - the left matrix 114 . B - the right matrix 115 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 116 117 Output Parameters: 118 . C - the matrix containing the ij structure of product matrix 119 120 Notes: 121 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 122 123 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 124 125 Level: intermediate 126 127 .seealso: MatMatMult(),MatMatMultNumeric() 128 @*/ 129 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 130 PetscErrorCode ierr; 131 PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 132 PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 136 PetscValidType(A,1); 137 MatPreallocated(A); 138 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 139 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 140 141 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 142 PetscValidType(B,2); 143 MatPreallocated(B); 144 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 145 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 146 PetscValidPointer(C,3); 147 148 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 149 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 150 151 /* For now, we do not dispatch based on the type of A and P */ 152 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 153 Asymbolic = A->ops->matmultsymbolic; 154 if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 155 Bsymbolic = B->ops->matmultsymbolic; 156 if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 157 if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 158 159 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 160 ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 161 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 162 163 PetscFunctionReturn(0); 164 } 165 166 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 169 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 170 { 171 PetscErrorCode ierr; 172 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr; 173 174 PetscFunctionBegin; 175 ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 176 ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 177 ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 178 ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr); 179 ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr); 180 /* 181 ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr); 182 ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr); 183 */ 184 ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 185 ierr = PetscFree(mult);CHKERRQ(ierr); 186 187 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 188 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 194 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 195 { 196 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 197 PetscErrorCode ierr; 198 int *idx,i,start,end,ncols,nzA,nzB,*cmap; 199 Mat_MatMatMultMPI *mult; 200 201 PetscFunctionBegin; 202 if (a->cstart != b->rstart || a->cend != b->rend){ 203 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 204 } 205 ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 206 207 /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 208 ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr); 209 210 /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 211 start = a->rstart; end = a->rend; 212 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 213 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr); 214 215 /* compute C_seq = A_seq * B_seq */ 216 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 217 218 /* create mpi matrix C by concatinating C_seq */ 219 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 220 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 221 222 /* attach the supporting struct to C for reuse of symbolic C */ 223 (*C)->spptr = (void*)mult; 224 (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 225 226 PetscFunctionReturn(0); 227 } 228 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 231 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 232 { 233 PetscErrorCode ierr; 234 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 235 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 236 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 237 int am=A->M,bn=B->N,bm=B->M; 238 int i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 239 MatScalar *ca; 240 PetscBT lnkbt; 241 242 PetscFunctionBegin; 243 /* Set up */ 244 /* Allocate ci array, arrays for fill computation and */ 245 /* free space for accumulating nonzero column info */ 246 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 247 ci[0] = 0; 248 249 /* create and initialize a linked list */ 250 nlnk = bn+1; 251 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 252 253 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 254 ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 255 current_space = free_space; 256 257 /* Determine symbolic info for each row of the product: */ 258 for (i=0;i<am;i++) { 259 anzi = ai[i+1] - ai[i]; 260 cnzi = 0; 261 j = anzi; 262 aj = a->j + ai[i]; 263 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 264 j--; 265 brow = *(aj + j); 266 bnzj = bi[brow+1] - bi[brow]; 267 bjj = bj + bi[brow]; 268 /* add non-zero cols of B into the sorted linked list lnk */ 269 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 270 cnzi += nlnk; 271 } 272 273 /* If free space is not available, make more free space */ 274 /* Double the amount of total space in the list */ 275 if (current_space->local_remaining<cnzi) { 276 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 277 nspacedouble++; 278 } 279 280 /* Copy data into free space, then initialize lnk */ 281 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 282 current_space->array += cnzi; 283 current_space->local_used += cnzi; 284 current_space->local_remaining -= cnzi; 285 286 ci[i+1] = ci[i] + cnzi; 287 } 288 289 /* Column indices are in the list of free space */ 290 /* Allocate space for cj, initialize cj, and */ 291 /* destroy list of free space and other temporary array(s) */ 292 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 293 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 294 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 295 296 /* Allocate space for ca */ 297 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 298 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 299 300 /* put together the new symbolic matrix */ 301 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 302 303 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 304 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 305 c = (Mat_SeqAIJ *)((*C)->data); 306 c->freedata = PETSC_TRUE; 307 c->nonew = 0; 308 309 if (nspacedouble){ 310 PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatMatMultNumeric" 317 /*@ 318 MatMatMultNumeric - Performs the numeric matrix-matrix product. 319 Call this routine after first calling MatMatMultSymbolic(). 320 321 Collective on Mat 322 323 Input Parameters: 324 + A - the left matrix 325 - B - the right matrix 326 327 Output Parameters: 328 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 329 330 Notes: 331 C must have been created with MatMatMultSymbolic. 332 333 This routine is currently only implemented for SeqAIJ type matrices. 334 335 Level: intermediate 336 337 .seealso: MatMatMult(),MatMatMultSymbolic() 338 @*/ 339 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 340 PetscErrorCode ierr; 341 PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 342 PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 343 344 PetscFunctionBegin; 345 346 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 347 PetscValidType(A,1); 348 MatPreallocated(A); 349 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 350 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 351 352 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 353 PetscValidType(B,2); 354 MatPreallocated(B); 355 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 356 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 357 358 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 359 PetscValidType(C,3); 360 MatPreallocated(C); 361 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 362 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 363 364 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 365 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 366 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 367 368 /* For now, we do not dispatch based on the type of A and B */ 369 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 370 Anumeric = A->ops->matmultnumeric; 371 if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 372 Bnumeric = B->ops->matmultnumeric; 373 if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 374 if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 375 376 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 377 ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 378 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 379 380 PetscFunctionReturn(0); 381 } 382 383 /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 386 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 387 { 388 PetscErrorCode ierr; 389 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr; 390 Mat *seq; 391 392 PetscFunctionBegin; 393 seq = &mult->B_seq; 394 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 395 mult->B_seq = *seq; 396 397 seq = &mult->A_loc; 398 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 399 mult->A_loc = *seq; 400 401 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 402 403 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 404 ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 405 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 411 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 412 { 413 PetscErrorCode ierr; 414 int flops=0; 415 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 416 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 417 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 418 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 419 int am=A->M,cn=C->N; 420 int i,j,k,anzi,bnzi,cnzi,brow; 421 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 422 423 PetscFunctionBegin; 424 425 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 426 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 427 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 428 /* Traverse A row-wise. */ 429 /* Build the ith row in C by summing over nonzero columns in A, */ 430 /* the rows of B corresponding to nonzeros of A. */ 431 for (i=0;i<am;i++) { 432 anzi = ai[i+1] - ai[i]; 433 for (j=0;j<anzi;j++) { 434 brow = *aj++; 435 bnzi = bi[brow+1] - bi[brow]; 436 bjj = bj + bi[brow]; 437 baj = ba + bi[brow]; 438 for (k=0;k<bnzi;k++) { 439 temp[bjj[k]] += (*aa)*baj[k]; 440 } 441 flops += 2*bnzi; 442 aa++; 443 } 444 /* Store row back into C, and re-zero temp */ 445 cnzi = ci[i+1] - ci[i]; 446 for (j=0;j<cnzi;j++) { 447 ca[j] = temp[cj[j]]; 448 temp[cj[j]] = 0.0; 449 } 450 ca += cnzi; 451 cj += cnzi; 452 } 453 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 456 /* Free temp */ 457 ierr = PetscFree(temp);CHKERRQ(ierr); 458 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMatMultTranspose" 464 /*@ 465 MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 466 467 Collective on Mat 468 469 Input Parameters: 470 + A - the left matrix 471 . B - the right matrix 472 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 473 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 474 475 Output Parameters: 476 . C - the product matrix 477 478 Notes: 479 C will be created and must be destroyed by the user with MatDestroy(). 480 481 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 482 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 483 484 Level: intermediate 485 486 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 487 @*/ 488 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 489 { 490 PetscErrorCode ierr; 491 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 492 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 493 494 PetscFunctionBegin; 495 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 496 PetscValidType(A,1); 497 MatPreallocated(A); 498 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 499 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 500 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 501 PetscValidType(B,2); 502 MatPreallocated(B); 503 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 504 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 505 PetscValidPointer(C,3); 506 if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 507 508 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 509 510 fA = A->ops->matmulttranspose; 511 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 512 fB = B->ops->matmulttranspose; 513 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 514 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 515 516 ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 517 ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 518 ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 519 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 525 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 526 PetscErrorCode ierr; 527 528 PetscFunctionBegin; 529 if (scall == MAT_INITIAL_MATRIX){ 530 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 531 } 532 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 538 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 539 { 540 PetscErrorCode ierr; 541 Mat At; 542 int *ati,*atj; 543 544 PetscFunctionBegin; 545 /* create symbolic At */ 546 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 547 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 548 549 /* get symbolic C=At*B */ 550 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 551 552 /* clean up */ 553 ierr = MatDestroy(At);CHKERRQ(ierr); 554 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 555 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 561 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 562 { 563 PetscErrorCode ierr; 564 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 565 int am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 566 int cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 567 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 568 569 PetscFunctionBegin; 570 /* clear old values in C */ 571 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 572 573 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 574 for (i=0;i<am;i++) { 575 bj = b->j + bi[i]; 576 ba = b->a + bi[i]; 577 bnzi = bi[i+1] - bi[i]; 578 anzi = ai[i+1] - ai[i]; 579 for (j=0; j<anzi; j++) { 580 nextb = 0; 581 crow = *aj++; 582 cjj = cj + ci[crow]; 583 caj = ca + ci[crow]; 584 /* perform sparse axpy operation. Note cjj includes bj. */ 585 for (k=0; nextb<bnzi; k++) { 586 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 587 caj[k] += (*aa)*(*(ba+nextb)); 588 nextb++; 589 } 590 } 591 flops += 2*bnzi; 592 aa++; 593 } 594 } 595 596 /* Assemble the final matrix and clean up */ 597 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 598 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 599 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602