1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 12 EXTERN_C_BEGIN 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 15 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 16 { 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 if (scall == MAT_INITIAL_MATRIX){ 21 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 22 } 23 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 24 PetscFunctionReturn(0); 25 } 26 EXTERN_C_END 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 30 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 31 { 32 PetscErrorCode ierr; 33 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 34 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 35 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 36 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 37 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 38 MatScalar *ca; 39 PetscBT lnkbt; 40 PetscReal afill; 41 42 PetscFunctionBegin; 43 /* Allocate ci array, arrays for fill computation and */ 44 /* free space for accumulating nonzero column info */ 45 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 46 ci[0] = 0; 47 48 /* create and initialize a linked list */ 49 nlnk = bn+1; 50 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 51 52 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 53 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 54 current_space = free_space; 55 56 /* Determine symbolic info for each row of the product: */ 57 for (i=0;i<am;i++) { 58 anzi = ai[i+1] - ai[i]; 59 cnzi = 0; 60 j = anzi; 61 aj = a->j + ai[i]; 62 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 63 j--; 64 brow = *(aj + j); 65 bnzj = bi[brow+1] - bi[brow]; 66 bjj = bj + bi[brow]; 67 /* add non-zero cols of B into the sorted linked list lnk */ 68 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 69 cnzi += nlnk; 70 } 71 72 /* If free space is not available, make more free space */ 73 /* Double the amount of total space in the list */ 74 if (current_space->local_remaining<cnzi) { 75 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 76 nspacedouble++; 77 } 78 79 /* Copy data into free space, then initialize lnk */ 80 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 81 current_space->array += cnzi; 82 current_space->local_used += cnzi; 83 current_space->local_remaining -= cnzi; 84 85 ci[i+1] = ci[i] + cnzi; 86 } 87 88 /* Column indices are in the list of free space */ 89 /* Allocate space for cj, initialize cj, and */ 90 /* destroy list of free space and other temporary array(s) */ 91 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 92 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 93 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 94 95 /* Allocate space for ca */ 96 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 97 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 98 99 /* put together the new symbolic matrix */ 100 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 101 102 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 103 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 104 c = (Mat_SeqAIJ *)((*C)->data); 105 c->free_a = PETSC_TRUE; 106 c->free_ij = PETSC_TRUE; 107 c->nonew = 0; 108 109 /* set MatInfo */ 110 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 111 if (afill < 1.0) afill = 1.0; 112 c->maxnz = ci[am]; 113 c->nz = ci[am]; 114 (*C)->info.mallocs = nspacedouble; 115 (*C)->info.fill_ratio_given = fill; 116 (*C)->info.fill_ratio_needed = afill; 117 118 #if defined(PETSC_USE_INFO) 119 if (ci[am]) { 120 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 121 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 122 } else { 123 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 124 } 125 #endif 126 PetscFunctionReturn(0); 127 } 128 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 132 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 133 { 134 PetscErrorCode ierr; 135 PetscLogDouble flops=0.0; 136 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 137 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 138 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 139 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 140 PetscInt am=A->rmap->N,cm=C->rmap->N; 141 PetscInt i,j,k,anzi,bnzi,cnzi,brow,nextb; 142 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a; 143 144 PetscFunctionBegin; 145 /* clean old values in C */ 146 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 147 /* Traverse A row-wise. */ 148 /* Build the ith row in C by summing over nonzero columns in A, */ 149 /* the rows of B corresponding to nonzeros of A. */ 150 for (i=0;i<am;i++) { 151 anzi = ai[i+1] - ai[i]; 152 for (j=0;j<anzi;j++) { 153 brow = *aj++; 154 bnzi = bi[brow+1] - bi[brow]; 155 bjj = bj + bi[brow]; 156 baj = ba + bi[brow]; 157 nextb = 0; 158 for (k=0; nextb<bnzi; k++) { 159 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 160 ca[k] += (*aa)*baj[nextb++]; 161 } 162 } 163 flops += 2*bnzi; 164 aa++; 165 } 166 cnzi = ci[i+1] - ci[i]; 167 ca += cnzi; 168 cj += cnzi; 169 } 170 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 173 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 179 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 180 { 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 if (scall == MAT_INITIAL_MATRIX){ 185 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 186 } 187 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 193 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 194 { 195 PetscErrorCode ierr; 196 Mat Bt; 197 PetscInt *bti,*btj; 198 199 PetscFunctionBegin; 200 /* create symbolic Bt */ 201 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 202 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 203 204 /* get symbolic C=A*Bt */ 205 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 206 207 /* clean up */ 208 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 209 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 210 211 #if defined(INEFFICIENT_ALGORITHM) 212 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 213 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 214 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 215 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 216 PetscInt am=A->rmap->N,bm=B->rmap->N; 217 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 218 MatScalar *ca; 219 PetscBT lnkbt; 220 PetscReal afill; 221 222 /* Allocate row pointer array ci */ 223 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 224 ci[0] = 0; 225 226 /* Create and initialize a linked list for C columns */ 227 nlnk = bm+1; 228 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 229 230 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 231 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 232 current_space = free_space; 233 234 /* Determine symbolic info for each row of the product A*B^T: */ 235 for (i=0; i<am; i++) { 236 anzi = ai[i+1] - ai[i]; 237 cnzi = 0; 238 acol = aj + ai[i]; 239 for (j=0; j<bm; j++){ 240 bnzj = bi[j+1] - bi[j]; 241 bcol= bj + bi[j]; 242 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 243 ka = 0; kb = 0; 244 while (ka < anzi && kb < bnzj){ 245 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 246 if (ka == anzi) break; 247 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 248 if (kb == bnzj) break; 249 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 250 index[0] = j; 251 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 252 cnzi++; 253 break; 254 } 255 } 256 } 257 258 /* If free space is not available, make more free space */ 259 /* Double the amount of total space in the list */ 260 if (current_space->local_remaining<cnzi) { 261 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 262 nspacedouble++; 263 } 264 265 /* Copy data into free space, then initialize lnk */ 266 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 267 current_space->array += cnzi; 268 current_space->local_used += cnzi; 269 current_space->local_remaining -= cnzi; 270 271 ci[i+1] = ci[i] + cnzi; 272 } 273 274 275 /* Column indices are in the list of free space. 276 Allocate array cj, copy column indices to cj, and destroy list of free space */ 277 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 278 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 279 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 280 281 /* Allocate space for ca */ 282 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 283 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 284 285 /* put together the new symbolic matrix */ 286 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 287 288 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 289 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 290 c = (Mat_SeqAIJ *)((*C)->data); 291 c->free_a = PETSC_TRUE; 292 c->free_ij = PETSC_TRUE; 293 c->nonew = 0; 294 295 /* set MatInfo */ 296 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 297 if (afill < 1.0) afill = 1.0; 298 c->maxnz = ci[am]; 299 c->nz = ci[am]; 300 (*C)->info.mallocs = nspacedouble; 301 (*C)->info.fill_ratio_given = fill; 302 (*C)->info.fill_ratio_needed = afill; 303 304 #if defined(PETSC_USE_INFO) 305 if (ci[am]) { 306 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 307 ierr = PetscInfo1((*C),"Use MatMatMultTranspose(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 308 } else { 309 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 310 } 311 #endif 312 #endif 313 PetscFunctionReturn(0); 314 } 315 316 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 319 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 320 { 321 PetscErrorCode ierr; 322 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 323 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 324 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 325 PetscLogDouble flops=0.0; 326 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 327 #if defined(USE_ARRAY) 328 MatScalar *spdot; 329 #endif 330 PetscBool flg; 331 332 PetscFunctionBegin; 333 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmulttrans_color",&flg,PETSC_NULL);CHKERRQ(ierr); 334 335 if (flg){ 336 printf("Create MatMultTransposeColoring ...\n"); 337 /* Create MatMultTransposeColoring from symbolic C=A*B^T */ 338 MatMultTransposeColoring matfdcoloring = 0; 339 ISColoring iscoloring; 340 ierr = MatGetColoring(C,MATCOLORINGSL,&iscoloring);CHKERRQ(ierr); 341 ierr = MatMultTransposeColoringCreate(C,iscoloring,&matfdcoloring);CHKERRQ(ierr); 342 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 343 344 /* Create Bt_dense */ 345 Mat Bt_dense; 346 PetscInt m,n; 347 ierr = MatCreate(PETSC_COMM_WORLD,&Bt_dense);CHKERRQ(ierr); 348 ierr = MatSetSizes(Bt_dense,A->cmap->n,matfdcoloring->ncolors,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 349 ierr = MatSetType(Bt_dense,MATDENSE);CHKERRQ(ierr); 350 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 351 ierr = MatAssemblyBegin(Bt_dense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(Bt_dense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 354 printf("Bt_dense: %d,%d\n",m,n); 355 356 /* Get Bt_dense by Apply MatMultTransposeColoring to B */ 357 ierr = MatMultTransposeColoringApply(B,Bt_dense,matfdcoloring);CHKERRQ(ierr); 358 359 /* C_dense = A*Bt_dense */ 360 Mat C_dense; 361 ierr = MatMatMult(A,Bt_dense,MAT_INITIAL_MATRIX,2.0,&C_dense); CHKERRQ(ierr); 362 ierr = MatSetOptionsPrefix(C_dense,"C_dense_");CHKERRQ(ierr); 363 364 365 366 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not done yet"); 367 PetscFunctionReturn(0); 368 } 369 370 #if defined(USE_ARRAY) 371 /* allocate an array for implementing sparse inner-product */ 372 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 373 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 374 #endif 375 376 /* clear old values in C */ 377 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 378 379 for (i=0; i<cm; i++) { 380 anzi = ai[i+1] - ai[i]; 381 acol = aj + ai[i]; 382 aval = aa + ai[i]; 383 cnzi = ci[i+1] - ci[i]; 384 ccol = cj + ci[i]; 385 cval = ca + ci[i]; 386 for (j=0; j<cnzi; j++){ 387 brow = ccol[j]; 388 bnzj = bi[brow+1] - bi[brow]; 389 bcol = bj + bi[brow]; 390 bval = ba + bi[brow]; 391 392 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 393 #if defined(USE_ARRAY) 394 /* put ba to spdot array */ 395 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 396 /* c(i,j)=A[i,:]*B[j,:]^T */ 397 for (nexta=0; nexta<anzi; nexta++){ 398 cval[j] += spdot[acol[nexta]]*aval[nexta]; 399 } 400 /* zero spdot array */ 401 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 402 #else 403 nexta = 0; nextb = 0; 404 while (nexta<anzi && nextb<bnzj){ 405 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 406 if (nexta == anzi) break; 407 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 408 if (nextb == bnzj) break; 409 if (acol[nexta] == bcol[nextb]){ 410 cval[j] += aval[nexta]*bval[nextb]; 411 nexta++; nextb++; 412 flops += 2; 413 } 414 } 415 #endif 416 } 417 } 418 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 419 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 420 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 421 #if defined(USE_ARRAY) 422 ierr = PetscFree(spdot); 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 429 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 430 PetscErrorCode ierr; 431 432 PetscFunctionBegin; 433 if (scall == MAT_INITIAL_MATRIX){ 434 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 435 } 436 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 442 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 443 { 444 PetscErrorCode ierr; 445 Mat At; 446 PetscInt *ati,*atj; 447 448 PetscFunctionBegin; 449 /* create symbolic At */ 450 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 451 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 452 453 /* get symbolic C=At*B */ 454 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 455 456 /* clean up */ 457 ierr = MatDestroy(&At);CHKERRQ(ierr); 458 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 464 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 465 { 466 PetscErrorCode ierr; 467 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 468 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 469 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 470 PetscLogDouble flops=0.0; 471 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 472 473 PetscFunctionBegin; 474 /* clear old values in C */ 475 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 476 477 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 478 for (i=0;i<am;i++) { 479 bj = b->j + bi[i]; 480 ba = b->a + bi[i]; 481 bnzi = bi[i+1] - bi[i]; 482 anzi = ai[i+1] - ai[i]; 483 for (j=0; j<anzi; j++) { 484 nextb = 0; 485 crow = *aj++; 486 cjj = cj + ci[crow]; 487 caj = ca + ci[crow]; 488 /* perform sparse axpy operation. Note cjj includes bj. */ 489 for (k=0; nextb<bnzi; k++) { 490 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 491 caj[k] += (*aa)*(*(ba+nextb)); 492 nextb++; 493 } 494 } 495 flops += 2*bnzi; 496 aa++; 497 } 498 } 499 500 /* Assemble the final matrix and clean up */ 501 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 502 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 503 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 EXTERN_C_BEGIN 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 510 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 511 { 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 if (scall == MAT_INITIAL_MATRIX){ 516 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 517 } 518 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 EXTERN_C_END 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 525 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 526 { 527 PetscErrorCode ierr; 528 529 PetscFunctionBegin; 530 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 536 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 537 { 538 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 539 PetscErrorCode ierr; 540 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 541 MatScalar *aa; 542 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 543 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 544 545 PetscFunctionBegin; 546 if (!cm || !cn) PetscFunctionReturn(0); 547 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 548 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 549 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 550 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 551 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 552 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 553 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 554 colam = col*am; 555 for (i=0; i<am; i++) { /* over rows of C in those columns */ 556 r1 = r2 = r3 = r4 = 0.0; 557 n = a->i[i+1] - a->i[i]; 558 aj = a->j + a->i[i]; 559 aa = a->a + a->i[i]; 560 for (j=0; j<n; j++) { 561 r1 += (*aa)*b1[*aj]; 562 r2 += (*aa)*b2[*aj]; 563 r3 += (*aa)*b3[*aj]; 564 r4 += (*aa++)*b4[*aj++]; 565 } 566 c[colam + i] = r1; 567 c[colam + am + i] = r2; 568 c[colam + am2 + i] = r3; 569 c[colam + am3 + i] = r4; 570 } 571 b1 += bm4; 572 b2 += bm4; 573 b3 += bm4; 574 b4 += bm4; 575 } 576 for (;col<cn; col++){ /* over extra columns of C */ 577 for (i=0; i<am; i++) { /* over rows of C in those columns */ 578 r1 = 0.0; 579 n = a->i[i+1] - a->i[i]; 580 aj = a->j + a->i[i]; 581 aa = a->a + a->i[i]; 582 583 for (j=0; j<n; j++) { 584 r1 += (*aa++)*b1[*aj++]; 585 } 586 c[col*am + i] = r1; 587 } 588 b1 += bm; 589 } 590 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 591 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 592 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /* 599 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 600 */ 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 603 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 604 { 605 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 606 PetscErrorCode ierr; 607 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 608 MatScalar *aa; 609 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 610 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 611 612 PetscFunctionBegin; 613 if (!cm || !cn) PetscFunctionReturn(0); 614 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 615 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 616 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 617 618 if (a->compressedrow.use){ /* use compressed row format */ 619 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 620 colam = col*am; 621 arm = a->compressedrow.nrows; 622 ii = a->compressedrow.i; 623 ridx = a->compressedrow.rindex; 624 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 625 r1 = r2 = r3 = r4 = 0.0; 626 n = ii[i+1] - ii[i]; 627 aj = a->j + ii[i]; 628 aa = a->a + ii[i]; 629 for (j=0; j<n; j++) { 630 r1 += (*aa)*b1[*aj]; 631 r2 += (*aa)*b2[*aj]; 632 r3 += (*aa)*b3[*aj]; 633 r4 += (*aa++)*b4[*aj++]; 634 } 635 c[colam + ridx[i]] += r1; 636 c[colam + am + ridx[i]] += r2; 637 c[colam + am2 + ridx[i]] += r3; 638 c[colam + am3 + ridx[i]] += r4; 639 } 640 b1 += bm4; 641 b2 += bm4; 642 b3 += bm4; 643 b4 += bm4; 644 } 645 for (;col<cn; col++){ /* over extra columns of C */ 646 colam = col*am; 647 arm = a->compressedrow.nrows; 648 ii = a->compressedrow.i; 649 ridx = a->compressedrow.rindex; 650 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 651 r1 = 0.0; 652 n = ii[i+1] - ii[i]; 653 aj = a->j + ii[i]; 654 aa = a->a + ii[i]; 655 656 for (j=0; j<n; j++) { 657 r1 += (*aa++)*b1[*aj++]; 658 } 659 c[col*am + ridx[i]] += r1; 660 } 661 b1 += bm; 662 } 663 } else { 664 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 665 colam = col*am; 666 for (i=0; i<am; i++) { /* over rows of C in those columns */ 667 r1 = r2 = r3 = r4 = 0.0; 668 n = a->i[i+1] - a->i[i]; 669 aj = a->j + a->i[i]; 670 aa = a->a + a->i[i]; 671 for (j=0; j<n; j++) { 672 r1 += (*aa)*b1[*aj]; 673 r2 += (*aa)*b2[*aj]; 674 r3 += (*aa)*b3[*aj]; 675 r4 += (*aa++)*b4[*aj++]; 676 } 677 c[colam + i] += r1; 678 c[colam + am + i] += r2; 679 c[colam + am2 + i] += r3; 680 c[colam + am3 + i] += r4; 681 } 682 b1 += bm4; 683 b2 += bm4; 684 b3 += bm4; 685 b4 += bm4; 686 } 687 for (;col<cn; col++){ /* over extra columns of C */ 688 for (i=0; i<am; i++) { /* over rows of C in those columns */ 689 r1 = 0.0; 690 n = a->i[i+1] - a->i[i]; 691 aj = a->j + a->i[i]; 692 aa = a->a + a->i[i]; 693 694 for (j=0; j<n; j++) { 695 r1 += (*aa++)*b1[*aj++]; 696 } 697 c[col*am + i] += r1; 698 } 699 b1 += bm; 700 } 701 } 702 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 703 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 704 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMultTransposeColoringApply_SeqAIJ" 710 PetscErrorCode MatMultTransposeColoringApply_SeqAIJ(Mat B,Mat Btdense,MatMultTransposeColoring coloring) 711 { 712 PetscErrorCode ierr; 713 Mat_SeqAIJ *a = (Mat_SeqAIJ*)B->data; 714 Mat_SeqDense *atdense = (Mat_SeqDense*)Btdense->data; 715 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*atcol,brow,bcol; 716 MatScalar *atval,*bval; 717 718 PetscFunctionBegin; 719 ierr = PetscMemzero(atdense->v,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 720 for (k=0; k<coloring->ncolors; k++) { 721 for (l=0; l<coloring->ncolumns[k]; l++) { /* insert a row of B to a column of Btdense */ 722 col = coloring->columns[k][l]; /* =row of B */ 723 anz = a->i[col+1] - a->i[col]; 724 for (j=0; j<anz; j++){ 725 atcol = a->j + a->i[col]; 726 atval = a->a + a->i[col]; 727 bval = atdense->v; 728 brow = atcol[j]; 729 bcol = k; 730 bval[bcol*m+brow] = atval[j]; 731 } 732 } 733 } 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatMultTransposeColoringCreate_SeqAIJ" 739 PetscErrorCode MatMultTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatMultTransposeColoring c) 740 { 741 PetscErrorCode ierr; 742 PetscInt i,n,nrows,N,j,k,m,*rows,*ci,*cj,ncols,col; 743 const PetscInt *is; 744 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,bs = 1; 745 IS *isa; 746 PetscBool done; 747 PetscBool flg1,flg2; 748 749 PetscFunctionBegin; 750 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 751 752 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 753 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 754 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 755 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 756 if (flg1 || flg2) { 757 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 758 } 759 760 N = mat->cmap->N/bs; 761 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 762 c->N = mat->cmap->N/bs; 763 c->m = mat->rmap->N/bs; 764 c->rstart = 0; 765 766 c->ncolors = nis; 767 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 768 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 769 ierr = PetscMalloc(c->m*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 770 ierr = PetscMalloc(c->m*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 771 ierr = PetscMalloc(c->m*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 772 773 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 774 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 775 776 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 777 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 778 779 for (i=0; i<nis; i++) { 780 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 781 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 782 c->ncolumns[i] = n; 783 if (n) { 784 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 785 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 786 } else { 787 c->columns[i] = 0; 788 } 789 790 /* fast, crude version requires O(N*N) work */ 791 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 792 /* loop over columns*/ 793 for (j=0; j<n; j++) { 794 col = is[j]; 795 rows = cj + ci[col]; 796 m = ci[col+1] - ci[col]; 797 /* loop over columns marking them in rowhit */ 798 for (k=0; k<m; k++) { 799 rowhit[*rows++] = col + 1; 800 } 801 } 802 /* count the number of hits */ 803 nrows = 0; 804 for (j=0; j<c->m; j++) { 805 if (rowhit[j]) nrows++; 806 } 807 c->nrows[i] = nrows; 808 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 809 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 810 nrows = 0; 811 for (j=0; j<c->m; j++) { 812 if (rowhit[j]) { 813 c->rows[i][nrows] = j; 814 c->columnsforrow[i][nrows] = rowhit[j] - 1; 815 nrows++; 816 } 817 } 818 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 819 } 820 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 821 822 ierr = PetscFree(rowhit);CHKERRQ(ierr); 823 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 824 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } 827