1 /* 2 Defines matrix-matrix product routines for pairs of AIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatMatMult" 12 /*@ 13 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the left matrix 19 . B - the right matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of AIJ matrices and classes 30 which inherit from AIJ. C will be of type MATAIJ. 31 32 Level: intermediate 33 34 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 35 @*/ 36 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 37 { 38 PetscErrorCode ierr; 39 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 40 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 41 42 PetscFunctionBegin; 43 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44 PetscValidType(A,1); 45 MatPreallocated(A); 46 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 47 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 48 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 49 PetscValidType(B,2); 50 MatPreallocated(B); 51 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53 PetscValidPointer(C,3); 54 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and B */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->matmult; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 62 fB = B->ops->matmult; 63 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 64 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 65 66 ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 67 ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 69 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 75 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 if (scall == MAT_INITIAL_MATRIX){ 81 ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 82 } else if (scall == MAT_REUSE_MATRIX){ 83 ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 84 } else { 85 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 92 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 if (scall == MAT_INITIAL_MATRIX){ 97 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 98 } 99 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatMatMultSymbolic" 105 /*@ 106 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 107 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 108 109 Collective on Mat 110 111 Input Parameters: 112 + A - the left matrix 113 . B - the right matrix 114 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 115 116 Output Parameters: 117 . C - the matrix containing the ij structure of product matrix 118 119 Notes: 120 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 121 122 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 123 124 Level: intermediate 125 126 .seealso: MatMatMult(),MatMatMultNumeric() 127 @*/ 128 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 129 PetscErrorCode ierr; 130 PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 131 PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 135 PetscValidType(A,1); 136 MatPreallocated(A); 137 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 138 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 139 140 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 141 PetscValidType(B,2); 142 MatPreallocated(B); 143 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 144 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 145 PetscValidPointer(C,3); 146 147 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 148 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 149 150 /* For now, we do not dispatch based on the type of A and P */ 151 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 152 Asymbolic = A->ops->matmultsymbolic; 153 if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 154 Bsymbolic = B->ops->matmultsymbolic; 155 if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 156 if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 157 158 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 159 ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 160 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 161 162 PetscFunctionReturn(0); 163 } 164 165 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 166 #undef __FUNCT__ 167 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 168 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 169 { 170 PetscErrorCode ierr; 171 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr; 172 173 PetscFunctionBegin; 174 ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 175 ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 176 ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 177 ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr); 178 ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr); 179 ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 180 ierr = PetscFree(mult);CHKERRQ(ierr); 181 182 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 183 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 189 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 190 { 191 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 192 PetscErrorCode ierr; 193 int *idx,i,start,end,ncols,nzA,nzB,*cmap; 194 Mat_MatMatMultMPI *mult; 195 196 PetscFunctionBegin; 197 if (a->cstart != b->rstart || a->cend != b->rend){ 198 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 199 } 200 ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 201 202 /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 203 start = a->cstart; 204 cmap = a->garray; 205 nzA = a->A->n; 206 nzB = a->B->n; 207 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 208 ncols = 0; 209 for (i=0; i<nzB; i++) { 210 if (cmap[i] < start) idx[ncols++] = cmap[i]; 211 else break; 212 } 213 mult->brstart = i; 214 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 215 for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i]; 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr); 217 ierr = PetscFree(idx);CHKERRQ(ierr); 218 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr); 219 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr); 220 221 /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 222 start = a->rstart; end = a->rend; 223 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 224 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr); 225 226 /* compute C_seq = A_seq * B_seq */ 227 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 228 229 /* create mpi matrix C by concatinating C_seq */ 230 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 231 ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 232 233 /* attach the supporting struct to C for reuse of symbolic C */ 234 (*C)->spptr = (void*)mult; 235 (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 236 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 242 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 243 { 244 PetscErrorCode ierr; 245 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 246 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 247 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 248 int *ci,*cj,*lnk; 249 int am=A->M,bn=B->N,bm=B->M; 250 int i,j,anzi,brow,bnzj,cnzi,nlnk,nspacedouble=0; 251 MatScalar *ca; 252 253 PetscFunctionBegin; 254 /* Set up */ 255 /* Allocate ci array, arrays for fill computation and */ 256 /* free space for accumulating nonzero column info */ 257 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 258 ci[0] = 0; 259 260 /* create and initialize a linked list for symbolic product */ 261 ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 262 ierr = PetscLLInitialize(bn,bn);CHKERRQ(ierr); 263 264 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 265 ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 266 current_space = free_space; 267 268 /* Determine symbolic info for each row of the product: */ 269 for (i=0;i<am;i++) { 270 anzi = ai[i+1] - ai[i]; 271 cnzi = 0; 272 j = anzi; 273 aj = a->j + ai[i]; 274 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 275 j--; 276 brow = *(aj + j); 277 bnzj = bi[brow+1] - bi[brow]; 278 bjj = bj + bi[brow]; 279 /* add non-zero cols of B into the sorted linked list lnk */ 280 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk);CHKERRQ(ierr); 281 cnzi += nlnk; 282 } 283 284 /* If free space is not available, make more free space */ 285 /* Double the amount of total space in the list */ 286 if (current_space->local_remaining<cnzi) { 287 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 288 nspacedouble++; 289 } 290 291 /* Copy data into free space, then initialize lnk */ 292 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array);CHKERRQ(ierr); 293 current_space->array += cnzi; 294 current_space->local_used += cnzi; 295 current_space->local_remaining -= cnzi; 296 297 ci[i+1] = ci[i] + cnzi; 298 } 299 300 /* Column indices are in the list of free space */ 301 /* Allocate space for cj, initialize cj, and */ 302 /* destroy list of free space and other temporary array(s) */ 303 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 304 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 305 ierr = PetscFree(lnk);CHKERRQ(ierr); 306 307 /* Allocate space for ca */ 308 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 309 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 310 311 /* put together the new symbolic matrix */ 312 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 313 314 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 315 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 316 c = (Mat_SeqAIJ *)((*C)->data); 317 c->freedata = PETSC_TRUE; 318 c->nonew = 0; 319 320 PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatMatMultNumeric" 326 /*@ 327 MatMatMultNumeric - Performs the numeric matrix-matrix product. 328 Call this routine after first calling MatMatMultSymbolic(). 329 330 Collective on Mat 331 332 Input Parameters: 333 + A - the left matrix 334 - B - the right matrix 335 336 Output Parameters: 337 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 338 339 Notes: 340 C must have been created with MatMatMultSymbolic. 341 342 This routine is currently only implemented for SeqAIJ type matrices. 343 344 Level: intermediate 345 346 .seealso: MatMatMult(),MatMatMultSymbolic() 347 @*/ 348 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 349 PetscErrorCode ierr; 350 PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 351 PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 352 353 PetscFunctionBegin; 354 355 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 356 PetscValidType(A,1); 357 MatPreallocated(A); 358 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 359 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 360 361 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 362 PetscValidType(B,2); 363 MatPreallocated(B); 364 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 365 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 366 367 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 368 PetscValidType(C,3); 369 MatPreallocated(C); 370 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 371 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 372 373 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 374 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 375 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 376 377 /* For now, we do not dispatch based on the type of A and B */ 378 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 379 Anumeric = A->ops->matmultnumeric; 380 if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 381 Bnumeric = B->ops->matmultnumeric; 382 if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 383 if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 384 385 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 386 ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 387 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 388 389 PetscFunctionReturn(0); 390 } 391 392 /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 395 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 396 { 397 PetscErrorCode ierr; 398 Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr; 399 400 PetscFunctionBegin; 401 ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr) 402 ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr); 403 ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 404 405 ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 406 ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 413 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 414 { 415 PetscErrorCode ierr; 416 int flops=0; 417 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 418 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 419 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 420 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 421 int am=A->M,cn=C->N; 422 int i,j,k,anzi,bnzi,cnzi,brow; 423 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 424 425 PetscFunctionBegin; 426 427 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 428 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 429 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 430 /* Traverse A row-wise. */ 431 /* Build the ith row in C by summing over nonzero columns in A, */ 432 /* the rows of B corresponding to nonzeros of A. */ 433 for (i=0;i<am;i++) { 434 anzi = ai[i+1] - ai[i]; 435 for (j=0;j<anzi;j++) { 436 brow = *aj++; 437 bnzi = bi[brow+1] - bi[brow]; 438 bjj = bj + bi[brow]; 439 baj = ba + bi[brow]; 440 for (k=0;k<bnzi;k++) { 441 temp[bjj[k]] += (*aa)*baj[k]; 442 } 443 flops += 2*bnzi; 444 aa++; 445 } 446 /* Store row back into C, and re-zero temp */ 447 cnzi = ci[i+1] - ci[i]; 448 for (j=0;j<cnzi;j++) { 449 ca[j] = temp[cj[j]]; 450 temp[cj[j]] = 0.0; 451 } 452 ca += cnzi; 453 cj += cnzi; 454 } 455 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 458 /* Free temp */ 459 ierr = PetscFree(temp);CHKERRQ(ierr); 460 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMatMultTranspose" 466 /*@ 467 MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 468 469 Collective on Mat 470 471 Input Parameters: 472 + A - the left matrix 473 . B - the right matrix 474 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 475 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 476 477 Output Parameters: 478 . C - the product matrix 479 480 Notes: 481 C will be created and must be destroyed by the user with MatDestroy(). 482 483 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 484 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 485 486 Level: intermediate 487 488 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 489 @*/ 490 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 491 { 492 PetscErrorCode ierr; 493 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 494 PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 495 496 PetscFunctionBegin; 497 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 498 PetscValidType(A,1); 499 MatPreallocated(A); 500 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 501 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 502 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 503 PetscValidType(B,2); 504 MatPreallocated(B); 505 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 506 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 507 PetscValidPointer(C,3); 508 if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 509 510 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 511 512 fA = A->ops->matmulttranspose; 513 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 514 fB = B->ops->matmulttranspose; 515 if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 516 if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 517 518 ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 519 ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 520 ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 521 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 527 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 528 PetscErrorCode ierr; 529 530 PetscFunctionBegin; 531 if (scall == MAT_INITIAL_MATRIX){ 532 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 533 } 534 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 540 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 541 { 542 PetscErrorCode ierr; 543 Mat At; 544 int *ati,*atj; 545 546 PetscFunctionBegin; 547 /* create symbolic At */ 548 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 549 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 550 551 /* get symbolic C=At*B */ 552 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 553 554 /* clean up */ 555 ierr = MatDestroy(At);CHKERRQ(ierr); 556 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 557 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 563 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 564 { 565 PetscErrorCode ierr; 566 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 567 int am=A->m,anzi,*ai=b->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 568 int cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 569 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 570 571 PetscFunctionBegin; 572 /* clear old values in C */ 573 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 574 575 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 576 for (i=0;i<am;i++) { 577 bj = b->j + bi[i]; 578 ba = b->a + bi[i]; 579 bnzi = bi[i+1] - bi[i]; 580 anzi = ai[i+1] - ai[i]; 581 for (j=0; j<anzi; j++) { 582 nextb = 0; 583 crow = *aj++; 584 cjj = cj + ci[crow]; 585 caj = ca + ci[crow]; 586 /* perform sparse axpy operation. Note cjj includes bj. */ 587 for (k=0; nextb<bnzi; k++) { 588 /* bcol = *(bj+nextb); */ 589 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 590 caj[k] += (*aa)*(*(ba+nextb)); 591 nextb++; 592 } 593 } 594 flops += 2*bnzi; 595 aa++; 596 } 597 } 598 599 /* Assemble the final matrix and clean up */ 600 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 601 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 602 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605