1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <petsc/private/isimpl.h> 11 #include <../src/mat/impls/dense/seq/dense.h> 12 13 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14 { 15 PetscFunctionBegin; 16 if (C->ops->matmultnumeric) PetscCall((*C->ops->matmultnumeric)(A,B,C)); 17 else PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C)); 18 PetscFunctionReturn(0); 19 } 20 21 /* Modified from MatCreateSeqAIJWithArrays() */ 22 PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 23 { 24 PetscInt ii; 25 Mat_SeqAIJ *aij; 26 PetscBool isseqaij, osingle, ofree_a, ofree_ij; 27 28 PetscFunctionBegin; 29 PetscCheck(m <= 0 || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 30 PetscCall(MatSetSizes(mat,m,n,m,n)); 31 32 if (!mtype) { 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij)); 34 if (!isseqaij) PetscCall(MatSetType(mat,MATSEQAIJ)); 35 } else { 36 PetscCall(MatSetType(mat,mtype)); 37 } 38 39 aij = (Mat_SeqAIJ*)(mat)->data; 40 osingle = aij->singlemalloc; 41 ofree_a = aij->free_a; 42 ofree_ij = aij->free_ij; 43 /* changes the free flags */ 44 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL)); 45 46 PetscCall(PetscFree(aij->ilen)); 47 PetscCall(PetscFree(aij->imax)); 48 PetscCall(PetscMalloc1(m,&aij->imax)); 49 PetscCall(PetscMalloc1(m,&aij->ilen)); 50 for (ii=0,aij->nonzerorowcnt=0,aij->rmax = 0; ii<m; ii++) { 51 const PetscInt rnz = i[ii+1] - i[ii]; 52 aij->nonzerorowcnt += !!rnz; 53 aij->rmax = PetscMax(aij->rmax,rnz); 54 aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 55 } 56 aij->maxnz = i[m]; 57 aij->nz = i[m]; 58 59 if (osingle) { 60 PetscCall(PetscFree3(aij->a,aij->j,aij->i)); 61 } else { 62 if (ofree_a) PetscCall(PetscFree(aij->a)); 63 if (ofree_ij) PetscCall(PetscFree(aij->j)); 64 if (ofree_ij) PetscCall(PetscFree(aij->i)); 65 } 66 aij->i = i; 67 aij->j = j; 68 aij->a = a; 69 aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */ 70 /* default to not retain ownership */ 71 aij->singlemalloc = PETSC_FALSE; 72 aij->free_a = PETSC_FALSE; 73 aij->free_ij = PETSC_FALSE; 74 PetscCall(MatCheckCompressedRow(mat,aij->nonzerorowcnt,&aij->compressedrow,aij->i,m,0.6)); 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 79 { 80 Mat_Product *product = C->product; 81 MatProductAlgorithm alg; 82 PetscBool flg; 83 84 PetscFunctionBegin; 85 if (product) { 86 alg = product->alg; 87 } else { 88 alg = "sorted"; 89 } 90 /* sorted */ 91 PetscCall(PetscStrcmp(alg,"sorted",&flg)); 92 if (flg) { 93 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C)); 94 PetscFunctionReturn(0); 95 } 96 97 /* scalable */ 98 PetscCall(PetscStrcmp(alg,"scalable",&flg)); 99 if (flg) { 100 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C)); 101 PetscFunctionReturn(0); 102 } 103 104 /* scalable_fast */ 105 PetscCall(PetscStrcmp(alg,"scalable_fast",&flg)); 106 if (flg) { 107 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C)); 108 PetscFunctionReturn(0); 109 } 110 111 /* heap */ 112 PetscCall(PetscStrcmp(alg,"heap",&flg)); 113 if (flg) { 114 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C)); 115 PetscFunctionReturn(0); 116 } 117 118 /* btheap */ 119 PetscCall(PetscStrcmp(alg,"btheap",&flg)); 120 if (flg) { 121 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C)); 122 PetscFunctionReturn(0); 123 } 124 125 /* llcondensed */ 126 PetscCall(PetscStrcmp(alg,"llcondensed",&flg)); 127 if (flg) { 128 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C)); 129 PetscFunctionReturn(0); 130 } 131 132 /* rowmerge */ 133 PetscCall(PetscStrcmp(alg,"rowmerge",&flg)); 134 if (flg) { 135 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C)); 136 PetscFunctionReturn(0); 137 } 138 139 #if defined(PETSC_HAVE_HYPRE) 140 PetscCall(PetscStrcmp(alg,"hypre",&flg)); 141 if (flg) { 142 PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C)); 143 PetscFunctionReturn(0); 144 } 145 #endif 146 147 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 148 } 149 150 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 151 { 152 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 153 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 154 PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 155 PetscReal afill; 156 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 157 PetscTable ta; 158 PetscBT lnkbt; 159 PetscFreeSpaceList free_space=NULL,current_space=NULL; 160 161 PetscFunctionBegin; 162 /* Get ci and cj */ 163 /*---------------*/ 164 /* Allocate ci array, arrays for fill computation and */ 165 /* free space for accumulating nonzero column info */ 166 PetscCall(PetscMalloc1(am+2,&ci)); 167 ci[0] = 0; 168 169 /* create and initialize a linked list */ 170 PetscCall(PetscTableCreate(bn,bn,&ta)); 171 MatRowMergeMax_SeqAIJ(b,bm,ta); 172 PetscCall(PetscTableGetCount(ta,&Crmax)); 173 PetscCall(PetscTableDestroy(&ta)); 174 175 PetscCall(PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt)); 176 177 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 178 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 179 180 current_space = free_space; 181 182 /* Determine ci and cj */ 183 for (i=0; i<am; i++) { 184 anzi = ai[i+1] - ai[i]; 185 aj = a->j + ai[i]; 186 for (j=0; j<anzi; j++) { 187 brow = aj[j]; 188 bnzj = bi[brow+1] - bi[brow]; 189 bj = b->j + bi[brow]; 190 /* add non-zero cols of B into the sorted linked list lnk */ 191 PetscCall(PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt)); 192 } 193 /* add possible missing diagonal entry */ 194 if (C->force_diagonals) { 195 PetscCall(PetscLLCondensedAddSorted(1,&i,lnk,lnkbt)); 196 } 197 cnzi = lnk[0]; 198 199 /* If free space is not available, make more free space */ 200 /* Double the amount of total space in the list */ 201 if (current_space->local_remaining<cnzi) { 202 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 203 ndouble++; 204 } 205 206 /* Copy data into free space, then initialize lnk */ 207 PetscCall(PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt)); 208 209 current_space->array += cnzi; 210 current_space->local_used += cnzi; 211 current_space->local_remaining -= cnzi; 212 213 ci[i+1] = ci[i] + cnzi; 214 } 215 216 /* Column indices are in the list of free space */ 217 /* Allocate space for cj, initialize cj, and */ 218 /* destroy list of free space and other temporary array(s) */ 219 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 220 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 221 PetscCall(PetscLLCondensedDestroy(lnk,lnkbt)); 222 223 /* put together the new symbolic matrix */ 224 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 225 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 226 227 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 228 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 229 c = (Mat_SeqAIJ*)(C->data); 230 c->free_a = PETSC_FALSE; 231 c->free_ij = PETSC_TRUE; 232 c->nonew = 0; 233 234 /* fast, needs non-scalable O(bn) array 'abdense' */ 235 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 236 237 /* set MatInfo */ 238 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 239 if (afill < 1.0) afill = 1.0; 240 C->info.mallocs = ndouble; 241 C->info.fill_ratio_given = fill; 242 C->info.fill_ratio_needed = afill; 243 244 #if defined(PETSC_USE_INFO) 245 if (ci[am]) { 246 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 247 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 248 } else { 249 PetscCall(PetscInfo(C,"Empty matrix product\n")); 250 } 251 #endif 252 PetscFunctionReturn(0); 253 } 254 255 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 256 { 257 PetscLogDouble flops=0.0; 258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 259 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 260 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 261 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 262 PetscInt am =A->rmap->n,cm=C->rmap->n; 263 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 264 PetscScalar *ca,valtmp; 265 PetscScalar *ab_dense; 266 PetscContainer cab_dense; 267 const PetscScalar *aa,*ba,*baj; 268 269 PetscFunctionBegin; 270 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 271 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 272 if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 273 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 274 c->a = ca; 275 c->free_a = PETSC_TRUE; 276 } else ca = c->a; 277 278 /* TODO this should be done in the symbolic phase */ 279 /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 280 that is hard to eradicate) */ 281 PetscCall(PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense)); 282 if (!cab_dense) { 283 PetscCall(PetscMalloc1(B->cmap->N,&ab_dense)); 284 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&cab_dense)); 285 PetscCall(PetscContainerSetPointer(cab_dense,ab_dense)); 286 PetscCall(PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault)); 287 PetscCall(PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense)); 288 PetscCall(PetscObjectDereference((PetscObject)cab_dense)); 289 } 290 PetscCall(PetscContainerGetPointer(cab_dense,(void**)&ab_dense)); 291 PetscCall(PetscArrayzero(ab_dense,B->cmap->N)); 292 293 /* clean old values in C */ 294 PetscCall(PetscArrayzero(ca,ci[cm])); 295 /* Traverse A row-wise. */ 296 /* Build the ith row in C by summing over nonzero columns in A, */ 297 /* the rows of B corresponding to nonzeros of A. */ 298 for (i=0; i<am; i++) { 299 anzi = ai[i+1] - ai[i]; 300 for (j=0; j<anzi; j++) { 301 brow = aj[j]; 302 bnzi = bi[brow+1] - bi[brow]; 303 bjj = bj + bi[brow]; 304 baj = ba + bi[brow]; 305 /* perform dense axpy */ 306 valtmp = aa[j]; 307 for (k=0; k<bnzi; k++) { 308 ab_dense[bjj[k]] += valtmp*baj[k]; 309 } 310 flops += 2*bnzi; 311 } 312 aj += anzi; aa += anzi; 313 314 cnzi = ci[i+1] - ci[i]; 315 for (k=0; k<cnzi; k++) { 316 ca[k] += ab_dense[cj[k]]; 317 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 318 } 319 flops += cnzi; 320 cj += cnzi; ca += cnzi; 321 } 322 #if defined(PETSC_HAVE_DEVICE) 323 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 324 #endif 325 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 326 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 327 PetscCall(PetscLogFlops(flops)); 328 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 329 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 330 PetscFunctionReturn(0); 331 } 332 333 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 334 { 335 PetscLogDouble flops=0.0; 336 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 337 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 338 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 339 PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 340 PetscInt am = A->rmap->N,cm=C->rmap->N; 341 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 342 PetscScalar *ca=c->a,valtmp; 343 const PetscScalar *aa,*ba,*baj; 344 PetscInt nextb; 345 346 PetscFunctionBegin; 347 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 348 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 349 if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 350 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 351 c->a = ca; 352 c->free_a = PETSC_TRUE; 353 } 354 355 /* clean old values in C */ 356 PetscCall(PetscArrayzero(ca,ci[cm])); 357 /* Traverse A row-wise. */ 358 /* Build the ith row in C by summing over nonzero columns in A, */ 359 /* the rows of B corresponding to nonzeros of A. */ 360 for (i=0; i<am; i++) { 361 anzi = ai[i+1] - ai[i]; 362 cnzi = ci[i+1] - ci[i]; 363 for (j=0; j<anzi; j++) { 364 brow = aj[j]; 365 bnzi = bi[brow+1] - bi[brow]; 366 bjj = bj + bi[brow]; 367 baj = ba + bi[brow]; 368 /* perform sparse axpy */ 369 valtmp = aa[j]; 370 nextb = 0; 371 for (k=0; nextb<bnzi; k++) { 372 if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 373 ca[k] += valtmp*baj[nextb++]; 374 } 375 } 376 flops += 2*bnzi; 377 } 378 aj += anzi; aa += anzi; 379 cj += cnzi; ca += cnzi; 380 } 381 #if defined(PETSC_HAVE_DEVICE) 382 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 383 #endif 384 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 385 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 386 PetscCall(PetscLogFlops(flops)); 387 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 388 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 393 { 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 395 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 396 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 397 MatScalar *ca; 398 PetscReal afill; 399 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 400 PetscTable ta; 401 PetscFreeSpaceList free_space=NULL,current_space=NULL; 402 403 PetscFunctionBegin; 404 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 405 /*-----------------------------------------------------------------------------------------*/ 406 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 407 PetscCall(PetscMalloc1(am+2,&ci)); 408 ci[0] = 0; 409 410 /* create and initialize a linked list */ 411 PetscCall(PetscTableCreate(bn,bn,&ta)); 412 MatRowMergeMax_SeqAIJ(b,bm,ta); 413 PetscCall(PetscTableGetCount(ta,&Crmax)); 414 PetscCall(PetscTableDestroy(&ta)); 415 416 PetscCall(PetscLLCondensedCreate_fast(Crmax,&lnk)); 417 418 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 419 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 420 current_space = free_space; 421 422 /* Determine ci and cj */ 423 for (i=0; i<am; i++) { 424 anzi = ai[i+1] - ai[i]; 425 aj = a->j + ai[i]; 426 for (j=0; j<anzi; j++) { 427 brow = aj[j]; 428 bnzj = bi[brow+1] - bi[brow]; 429 bj = b->j + bi[brow]; 430 /* add non-zero cols of B into the sorted linked list lnk */ 431 PetscCall(PetscLLCondensedAddSorted_fast(bnzj,bj,lnk)); 432 } 433 /* add possible missing diagonal entry */ 434 if (C->force_diagonals) { 435 PetscCall(PetscLLCondensedAddSorted_fast(1,&i,lnk)); 436 } 437 cnzi = lnk[1]; 438 439 /* If free space is not available, make more free space */ 440 /* Double the amount of total space in the list */ 441 if (current_space->local_remaining<cnzi) { 442 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 443 ndouble++; 444 } 445 446 /* Copy data into free space, then initialize lnk */ 447 PetscCall(PetscLLCondensedClean_fast(cnzi,current_space->array,lnk)); 448 449 current_space->array += cnzi; 450 current_space->local_used += cnzi; 451 current_space->local_remaining -= cnzi; 452 453 ci[i+1] = ci[i] + cnzi; 454 } 455 456 /* Column indices are in the list of free space */ 457 /* Allocate space for cj, initialize cj, and */ 458 /* destroy list of free space and other temporary array(s) */ 459 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 460 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 461 PetscCall(PetscLLCondensedDestroy_fast(lnk)); 462 463 /* Allocate space for ca */ 464 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 465 466 /* put together the new symbolic matrix */ 467 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 468 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 469 470 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 471 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 472 c = (Mat_SeqAIJ*)(C->data); 473 c->free_a = PETSC_TRUE; 474 c->free_ij = PETSC_TRUE; 475 c->nonew = 0; 476 477 /* slower, less memory */ 478 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 479 480 /* set MatInfo */ 481 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 482 if (afill < 1.0) afill = 1.0; 483 C->info.mallocs = ndouble; 484 C->info.fill_ratio_given = fill; 485 C->info.fill_ratio_needed = afill; 486 487 #if defined(PETSC_USE_INFO) 488 if (ci[am]) { 489 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 490 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 491 } else { 492 PetscCall(PetscInfo(C,"Empty matrix product\n")); 493 } 494 #endif 495 PetscFunctionReturn(0); 496 } 497 498 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 499 { 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 501 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 502 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 503 MatScalar *ca; 504 PetscReal afill; 505 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 506 PetscTable ta; 507 PetscFreeSpaceList free_space=NULL,current_space=NULL; 508 509 PetscFunctionBegin; 510 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 511 /*---------------------------------------------------------------------------------------------*/ 512 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 513 PetscCall(PetscMalloc1(am+2,&ci)); 514 ci[0] = 0; 515 516 /* create and initialize a linked list */ 517 PetscCall(PetscTableCreate(bn,bn,&ta)); 518 MatRowMergeMax_SeqAIJ(b,bm,ta); 519 PetscCall(PetscTableGetCount(ta,&Crmax)); 520 PetscCall(PetscTableDestroy(&ta)); 521 PetscCall(PetscLLCondensedCreate_Scalable(Crmax,&lnk)); 522 523 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 524 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 525 current_space = free_space; 526 527 /* Determine ci and cj */ 528 for (i=0; i<am; i++) { 529 anzi = ai[i+1] - ai[i]; 530 aj = a->j + ai[i]; 531 for (j=0; j<anzi; j++) { 532 brow = aj[j]; 533 bnzj = bi[brow+1] - bi[brow]; 534 bj = b->j + bi[brow]; 535 /* add non-zero cols of B into the sorted linked list lnk */ 536 PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk)); 537 } 538 /* add possible missing diagonal entry */ 539 if (C->force_diagonals) { 540 PetscCall(PetscLLCondensedAddSorted_Scalable(1,&i,lnk)); 541 } 542 543 cnzi = lnk[0]; 544 545 /* If free space is not available, make more free space */ 546 /* Double the amount of total space in the list */ 547 if (current_space->local_remaining<cnzi) { 548 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 549 ndouble++; 550 } 551 552 /* Copy data into free space, then initialize lnk */ 553 PetscCall(PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk)); 554 555 current_space->array += cnzi; 556 current_space->local_used += cnzi; 557 current_space->local_remaining -= cnzi; 558 559 ci[i+1] = ci[i] + cnzi; 560 } 561 562 /* Column indices are in the list of free space */ 563 /* Allocate space for cj, initialize cj, and */ 564 /* destroy list of free space and other temporary array(s) */ 565 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 566 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 567 PetscCall(PetscLLCondensedDestroy_Scalable(lnk)); 568 569 /* Allocate space for ca */ 570 /*-----------------------*/ 571 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 572 573 /* put together the new symbolic matrix */ 574 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 575 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 576 577 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 578 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 579 c = (Mat_SeqAIJ*)(C->data); 580 c->free_a = PETSC_TRUE; 581 c->free_ij = PETSC_TRUE; 582 c->nonew = 0; 583 584 /* slower, less memory */ 585 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 586 587 /* set MatInfo */ 588 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 589 if (afill < 1.0) afill = 1.0; 590 C->info.mallocs = ndouble; 591 C->info.fill_ratio_given = fill; 592 C->info.fill_ratio_needed = afill; 593 594 #if defined(PETSC_USE_INFO) 595 if (ci[am]) { 596 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 597 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 598 } else { 599 PetscCall(PetscInfo(C,"Empty matrix product\n")); 600 } 601 #endif 602 PetscFunctionReturn(0); 603 } 604 605 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 606 { 607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 608 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 609 PetscInt *ci,*cj,*bb; 610 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 611 PetscReal afill; 612 PetscInt i,j,col,ndouble = 0; 613 PetscFreeSpaceList free_space=NULL,current_space=NULL; 614 PetscHeap h; 615 616 PetscFunctionBegin; 617 /* Get ci and cj - by merging sorted rows using a heap */ 618 /*---------------------------------------------------------------------------------------------*/ 619 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 620 PetscCall(PetscMalloc1(am+2,&ci)); 621 ci[0] = 0; 622 623 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 624 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 625 current_space = free_space; 626 627 PetscCall(PetscHeapCreate(a->rmax,&h)); 628 PetscCall(PetscMalloc1(a->rmax,&bb)); 629 630 /* Determine ci and cj */ 631 for (i=0; i<am; i++) { 632 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 633 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 634 ci[i+1] = ci[i]; 635 /* Populate the min heap */ 636 for (j=0; j<anzi; j++) { 637 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 638 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 639 PetscCall(PetscHeapAdd(h,j,bj[bb[j]++])); 640 } 641 } 642 /* Pick off the min element, adding it to free space */ 643 PetscCall(PetscHeapPop(h,&j,&col)); 644 while (j >= 0) { 645 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 646 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 647 ndouble++; 648 } 649 *(current_space->array++) = col; 650 current_space->local_used++; 651 current_space->local_remaining--; 652 ci[i+1]++; 653 654 /* stash if anything else remains in this row of B */ 655 if (bb[j] < bi[acol[j]+1]) PetscCall(PetscHeapStash(h,j,bj[bb[j]++])); 656 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 657 PetscInt j2,col2; 658 PetscCall(PetscHeapPeek(h,&j2,&col2)); 659 if (col2 != col) break; 660 PetscCall(PetscHeapPop(h,&j2,&col2)); 661 if (bb[j2] < bi[acol[j2]+1]) PetscCall(PetscHeapStash(h,j2,bj[bb[j2]++])); 662 } 663 /* Put any stashed elements back into the min heap */ 664 PetscCall(PetscHeapUnstash(h)); 665 PetscCall(PetscHeapPop(h,&j,&col)); 666 } 667 } 668 PetscCall(PetscFree(bb)); 669 PetscCall(PetscHeapDestroy(&h)); 670 671 /* Column indices are in the list of free space */ 672 /* Allocate space for cj, initialize cj, and */ 673 /* destroy list of free space and other temporary array(s) */ 674 PetscCall(PetscMalloc1(ci[am],&cj)); 675 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 676 677 /* put together the new symbolic matrix */ 678 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 679 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 680 681 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 682 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 683 c = (Mat_SeqAIJ*)(C->data); 684 c->free_a = PETSC_TRUE; 685 c->free_ij = PETSC_TRUE; 686 c->nonew = 0; 687 688 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 689 690 /* set MatInfo */ 691 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 692 if (afill < 1.0) afill = 1.0; 693 C->info.mallocs = ndouble; 694 C->info.fill_ratio_given = fill; 695 C->info.fill_ratio_needed = afill; 696 697 #if defined(PETSC_USE_INFO) 698 if (ci[am]) { 699 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 700 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 701 } else { 702 PetscCall(PetscInfo(C,"Empty matrix product\n")); 703 } 704 #endif 705 PetscFunctionReturn(0); 706 } 707 708 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 709 { 710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 711 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 712 PetscInt *ci,*cj,*bb; 713 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 714 PetscReal afill; 715 PetscInt i,j,col,ndouble = 0; 716 PetscFreeSpaceList free_space=NULL,current_space=NULL; 717 PetscHeap h; 718 PetscBT bt; 719 720 PetscFunctionBegin; 721 /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 722 /*---------------------------------------------------------------------------------------------*/ 723 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 724 PetscCall(PetscMalloc1(am+2,&ci)); 725 ci[0] = 0; 726 727 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 728 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 729 730 current_space = free_space; 731 732 PetscCall(PetscHeapCreate(a->rmax,&h)); 733 PetscCall(PetscMalloc1(a->rmax,&bb)); 734 PetscCall(PetscBTCreate(bn,&bt)); 735 736 /* Determine ci and cj */ 737 for (i=0; i<am; i++) { 738 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 739 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 740 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 741 ci[i+1] = ci[i]; 742 /* Populate the min heap */ 743 for (j=0; j<anzi; j++) { 744 PetscInt brow = acol[j]; 745 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 746 PetscInt bcol = bj[bb[j]]; 747 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 748 PetscCall(PetscHeapAdd(h,j,bcol)); 749 bb[j]++; 750 break; 751 } 752 } 753 } 754 /* Pick off the min element, adding it to free space */ 755 PetscCall(PetscHeapPop(h,&j,&col)); 756 while (j >= 0) { 757 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 758 fptr = NULL; /* need PetscBTMemzero */ 759 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 760 ndouble++; 761 } 762 *(current_space->array++) = col; 763 current_space->local_used++; 764 current_space->local_remaining--; 765 ci[i+1]++; 766 767 /* stash if anything else remains in this row of B */ 768 for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 769 PetscInt bcol = bj[bb[j]]; 770 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 771 PetscCall(PetscHeapAdd(h,j,bcol)); 772 bb[j]++; 773 break; 774 } 775 } 776 PetscCall(PetscHeapPop(h,&j,&col)); 777 } 778 if (fptr) { /* Clear the bits for this row */ 779 for (; fptr<current_space->array; fptr++) PetscCall(PetscBTClear(bt,*fptr)); 780 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 781 PetscCall(PetscBTMemzero(bn,bt)); 782 } 783 } 784 PetscCall(PetscFree(bb)); 785 PetscCall(PetscHeapDestroy(&h)); 786 PetscCall(PetscBTDestroy(&bt)); 787 788 /* Column indices are in the list of free space */ 789 /* Allocate space for cj, initialize cj, and */ 790 /* destroy list of free space and other temporary array(s) */ 791 PetscCall(PetscMalloc1(ci[am],&cj)); 792 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 793 794 /* put together the new symbolic matrix */ 795 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 796 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 797 798 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 799 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 800 c = (Mat_SeqAIJ*)(C->data); 801 c->free_a = PETSC_TRUE; 802 c->free_ij = PETSC_TRUE; 803 c->nonew = 0; 804 805 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 806 807 /* set MatInfo */ 808 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 809 if (afill < 1.0) afill = 1.0; 810 C->info.mallocs = ndouble; 811 C->info.fill_ratio_given = fill; 812 C->info.fill_ratio_needed = afill; 813 814 #if defined(PETSC_USE_INFO) 815 if (ci[am]) { 816 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 817 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 818 } else { 819 PetscCall(PetscInfo(C,"Empty matrix product\n")); 820 } 821 #endif 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 826 { 827 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 828 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 829 PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 830 PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 831 const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 832 const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 833 const PetscInt *brow_ptr[8],*brow_end[8]; 834 PetscInt window[8]; 835 PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 836 PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 837 PetscReal afill; 838 PetscInt *workj_L1,*workj_L2,*workj_L3; 839 PetscInt L1_nnz,L2_nnz; 840 841 /* Step 1: Get upper bound on memory required for allocation. 842 Because of the way virtual memory works, 843 only the memory pages that are actually needed will be physically allocated. */ 844 PetscFunctionBegin; 845 PetscCall(PetscMalloc1(am+1,&ci)); 846 for (i=0; i<am; i++) { 847 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 848 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 849 a_rownnz = 0; 850 for (k=0; k<anzi; ++k) { 851 a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 852 if (a_rownnz > bn) { 853 a_rownnz = bn; 854 break; 855 } 856 } 857 a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 858 } 859 /* temporary work areas for merging rows */ 860 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L1)); 861 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L2)); 862 PetscCall(PetscMalloc1(a_maxrownnz,&workj_L3)); 863 864 /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 865 c_maxmem = 8*(ai[am]+bi[bm]); 866 /* Step 2: Populate pattern for C */ 867 PetscCall(PetscMalloc1(c_maxmem,&cj)); 868 869 ci_nnz = 0; 870 ci[0] = 0; 871 worki_L1[0] = 0; 872 worki_L2[0] = 0; 873 for (i=0; i<am; i++) { 874 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 875 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 876 rowsleft = anzi; 877 inputcol_L1 = acol; 878 L2_nnz = 0; 879 L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 880 worki_L2[1] = 0; 881 outputi_nnz = 0; 882 883 /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 884 while (ci_nnz+a_maxrownnz > c_maxmem) { 885 c_maxmem *= 2; 886 ndouble++; 887 PetscCall(PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)); 888 } 889 890 while (rowsleft) { 891 L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 892 L1_nrows = 0; 893 L1_nnz = 0; 894 inputcol = inputcol_L1; 895 inputi = bi; 896 inputj = bj; 897 898 /* The following macro is used to specialize for small rows in A. 899 This helps with compiler unrolling, improving performance substantially. 900 Input: inputj inputi inputcol bn 901 Output: outputj outputi_nnz */ 902 #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 903 window_min = bn; \ 904 outputi_nnz = 0; \ 905 for (k=0; k<ANNZ; ++k) { \ 906 brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 907 brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 908 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 909 window_min = PetscMin(window[k], window_min); \ 910 } \ 911 while (window_min < bn) { \ 912 outputj[outputi_nnz++] = window_min; \ 913 /* advance front and compute new minimum */ \ 914 old_window_min = window_min; \ 915 window_min = bn; \ 916 for (k=0; k<ANNZ; ++k) { \ 917 if (window[k] == old_window_min) { \ 918 brow_ptr[k]++; \ 919 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 920 } \ 921 window_min = PetscMin(window[k], window_min); \ 922 } \ 923 } 924 925 /************** L E V E L 1 ***************/ 926 /* Merge up to 8 rows of B to L1 work array*/ 927 while (L1_rowsleft) { 928 outputi_nnz = 0; 929 if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 930 else outputj = cj + ci_nnz; /* Merge directly to C */ 931 932 switch (L1_rowsleft) { 933 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 934 brow_end[0] = inputj + inputi[inputcol[0]+1]; 935 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 936 inputcol += L1_rowsleft; 937 rowsleft -= L1_rowsleft; 938 L1_rowsleft = 0; 939 break; 940 case 2: MatMatMultSymbolic_RowMergeMacro(2); 941 inputcol += L1_rowsleft; 942 rowsleft -= L1_rowsleft; 943 L1_rowsleft = 0; 944 break; 945 case 3: MatMatMultSymbolic_RowMergeMacro(3); 946 inputcol += L1_rowsleft; 947 rowsleft -= L1_rowsleft; 948 L1_rowsleft = 0; 949 break; 950 case 4: MatMatMultSymbolic_RowMergeMacro(4); 951 inputcol += L1_rowsleft; 952 rowsleft -= L1_rowsleft; 953 L1_rowsleft = 0; 954 break; 955 case 5: MatMatMultSymbolic_RowMergeMacro(5); 956 inputcol += L1_rowsleft; 957 rowsleft -= L1_rowsleft; 958 L1_rowsleft = 0; 959 break; 960 case 6: MatMatMultSymbolic_RowMergeMacro(6); 961 inputcol += L1_rowsleft; 962 rowsleft -= L1_rowsleft; 963 L1_rowsleft = 0; 964 break; 965 case 7: MatMatMultSymbolic_RowMergeMacro(7); 966 inputcol += L1_rowsleft; 967 rowsleft -= L1_rowsleft; 968 L1_rowsleft = 0; 969 break; 970 default: MatMatMultSymbolic_RowMergeMacro(8); 971 inputcol += 8; 972 rowsleft -= 8; 973 L1_rowsleft -= 8; 974 break; 975 } 976 inputcol_L1 = inputcol; 977 L1_nnz += outputi_nnz; 978 worki_L1[++L1_nrows] = L1_nnz; 979 } 980 981 /********************** L E V E L 2 ************************/ 982 /* Merge from L1 work array to either C or to L2 work array */ 983 if (anzi > 8) { 984 inputi = worki_L1; 985 inputj = workj_L1; 986 inputcol = workcol; 987 outputi_nnz = 0; 988 989 if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 990 else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 991 992 switch (L1_nrows) { 993 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 994 brow_end[0] = inputj + inputi[inputcol[0]+1]; 995 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 996 break; 997 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 998 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 999 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1000 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1001 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1002 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1003 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1004 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 1005 } 1006 L2_nnz += outputi_nnz; 1007 worki_L2[++L2_nrows] = L2_nnz; 1008 1009 /************************ L E V E L 3 **********************/ 1010 /* Merge from L2 work array to either C or to L2 work array */ 1011 if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1012 inputi = worki_L2; 1013 inputj = workj_L2; 1014 inputcol = workcol; 1015 outputi_nnz = 0; 1016 if (rowsleft) outputj = workj_L3; 1017 else outputj = cj + ci_nnz; 1018 switch (L2_nrows) { 1019 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 1020 brow_end[0] = inputj + inputi[inputcol[0]+1]; 1021 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1022 break; 1023 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1024 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1025 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1026 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1027 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1028 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1029 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1030 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1031 } 1032 L2_nrows = 1; 1033 L2_nnz = outputi_nnz; 1034 worki_L2[1] = outputi_nnz; 1035 /* Copy to workj_L2 */ 1036 if (rowsleft) { 1037 for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1038 } 1039 } 1040 } 1041 } /* while (rowsleft) */ 1042 #undef MatMatMultSymbolic_RowMergeMacro 1043 1044 /* terminate current row */ 1045 ci_nnz += outputi_nnz; 1046 ci[i+1] = ci_nnz; 1047 } 1048 1049 /* Step 3: Create the new symbolic matrix */ 1050 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1051 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1052 1053 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1054 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1055 c = (Mat_SeqAIJ*)(C->data); 1056 c->free_a = PETSC_TRUE; 1057 c->free_ij = PETSC_TRUE; 1058 c->nonew = 0; 1059 1060 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1061 1062 /* set MatInfo */ 1063 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1064 if (afill < 1.0) afill = 1.0; 1065 C->info.mallocs = ndouble; 1066 C->info.fill_ratio_given = fill; 1067 C->info.fill_ratio_needed = afill; 1068 1069 #if defined(PETSC_USE_INFO) 1070 if (ci[am]) { 1071 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1072 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1073 } else { 1074 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1075 } 1076 #endif 1077 1078 /* Step 4: Free temporary work areas */ 1079 PetscCall(PetscFree(workj_L1)); 1080 PetscCall(PetscFree(workj_L2)); 1081 PetscCall(PetscFree(workj_L3)); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* concatenate unique entries and then sort */ 1086 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1087 { 1088 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1089 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1090 PetscInt *ci,*cj,bcol; 1091 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1092 PetscReal afill; 1093 PetscInt i,j,ndouble = 0; 1094 PetscSegBuffer seg,segrow; 1095 char *seen; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscMalloc1(am+1,&ci)); 1099 ci[0] = 0; 1100 1101 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1102 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg)); 1103 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),100,&segrow)); 1104 PetscCall(PetscCalloc1(bn,&seen)); 1105 1106 /* Determine ci and cj */ 1107 for (i=0; i<am; i++) { 1108 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1109 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1110 PetscInt packlen = 0,*PETSC_RESTRICT crow; 1111 1112 /* Pack segrow */ 1113 for (j=0; j<anzi; j++) { 1114 PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1115 for (k=bjstart; k<bjend; k++) { 1116 bcol = bj[k]; 1117 if (!seen[bcol]) { /* new entry */ 1118 PetscInt *PETSC_RESTRICT slot; 1119 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1120 *slot = bcol; 1121 seen[bcol] = 1; 1122 packlen++; 1123 } 1124 } 1125 } 1126 1127 /* Check i-th diagonal entry */ 1128 if (C->force_diagonals && !seen[i]) { 1129 PetscInt *PETSC_RESTRICT slot; 1130 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1131 *slot = i; 1132 seen[i] = 1; 1133 packlen++; 1134 } 1135 1136 PetscCall(PetscSegBufferGetInts(seg,packlen,&crow)); 1137 PetscCall(PetscSegBufferExtractTo(segrow,crow)); 1138 PetscCall(PetscSortInt(packlen,crow)); 1139 ci[i+1] = ci[i] + packlen; 1140 for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1141 } 1142 PetscCall(PetscSegBufferDestroy(&segrow)); 1143 PetscCall(PetscFree(seen)); 1144 1145 /* Column indices are in the segmented buffer */ 1146 PetscCall(PetscSegBufferExtractAlloc(seg,&cj)); 1147 PetscCall(PetscSegBufferDestroy(&seg)); 1148 1149 /* put together the new symbolic matrix */ 1150 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1151 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1152 1153 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1154 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1155 c = (Mat_SeqAIJ*)(C->data); 1156 c->free_a = PETSC_TRUE; 1157 c->free_ij = PETSC_TRUE; 1158 c->nonew = 0; 1159 1160 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1161 1162 /* set MatInfo */ 1163 afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1164 if (afill < 1.0) afill = 1.0; 1165 C->info.mallocs = ndouble; 1166 C->info.fill_ratio_given = fill; 1167 C->info.fill_ratio_needed = afill; 1168 1169 #if defined(PETSC_USE_INFO) 1170 if (ci[am]) { 1171 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1172 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1173 } else { 1174 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1175 } 1176 #endif 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 1181 { 1182 Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 1183 1184 PetscFunctionBegin; 1185 PetscCall(MatTransposeColoringDestroy(&abt->matcoloring)); 1186 PetscCall(MatDestroy(&abt->Bt_den)); 1187 PetscCall(MatDestroy(&abt->ABt_den)); 1188 PetscCall(PetscFree(abt)); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1193 { 1194 Mat Bt; 1195 Mat_MatMatTransMult *abt; 1196 Mat_Product *product = C->product; 1197 char *alg; 1198 1199 PetscFunctionBegin; 1200 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1201 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1202 1203 /* create symbolic Bt */ 1204 PetscCall(MatTransposeSymbolic(B,&Bt)); 1205 PetscCall(MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1206 PetscCall(MatSetType(Bt,((PetscObject)A)->type_name)); 1207 1208 /* get symbolic C=A*Bt */ 1209 PetscCall(PetscStrallocpy(product->alg,&alg)); 1210 PetscCall(MatProductSetAlgorithm(C,"sorted")); /* set algorithm for C = A*Bt */ 1211 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C)); 1212 PetscCall(MatProductSetAlgorithm(C,alg)); /* resume original algorithm for ABt product */ 1213 PetscCall(PetscFree(alg)); 1214 1215 /* create a supporting struct for reuse intermediate dense matrices with matcoloring */ 1216 PetscCall(PetscNew(&abt)); 1217 1218 product->data = abt; 1219 product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 1220 1221 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 1222 1223 abt->usecoloring = PETSC_FALSE; 1224 PetscCall(PetscStrcmp(product->alg,"color",&abt->usecoloring)); 1225 if (abt->usecoloring) { 1226 /* Create MatTransposeColoring from symbolic C=A*B^T */ 1227 MatTransposeColoring matcoloring; 1228 MatColoring coloring; 1229 ISColoring iscoloring; 1230 Mat Bt_dense,C_dense; 1231 1232 /* inode causes memory problem */ 1233 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 1234 1235 PetscCall(MatColoringCreate(C,&coloring)); 1236 PetscCall(MatColoringSetDistance(coloring,2)); 1237 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 1238 PetscCall(MatColoringSetFromOptions(coloring)); 1239 PetscCall(MatColoringApply(coloring,&iscoloring)); 1240 PetscCall(MatColoringDestroy(&coloring)); 1241 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 1242 1243 abt->matcoloring = matcoloring; 1244 1245 PetscCall(ISColoringDestroy(&iscoloring)); 1246 1247 /* Create Bt_dense and C_dense = A*Bt_dense */ 1248 PetscCall(MatCreate(PETSC_COMM_SELF,&Bt_dense)); 1249 PetscCall(MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 1250 PetscCall(MatSetType(Bt_dense,MATSEQDENSE)); 1251 PetscCall(MatSeqDenseSetPreallocation(Bt_dense,NULL)); 1252 1253 Bt_dense->assembled = PETSC_TRUE; 1254 abt->Bt_den = Bt_dense; 1255 1256 PetscCall(MatCreate(PETSC_COMM_SELF,&C_dense)); 1257 PetscCall(MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors)); 1258 PetscCall(MatSetType(C_dense,MATSEQDENSE)); 1259 PetscCall(MatSeqDenseSetPreallocation(C_dense,NULL)); 1260 1261 Bt_dense->assembled = PETSC_TRUE; 1262 abt->ABt_den = C_dense; 1263 1264 #if defined(PETSC_USE_INFO) 1265 { 1266 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 1267 PetscCall(PetscInfo(C,"Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(double)(((PetscReal)(c->nz))/((PetscReal)(A->rmap->n*matcoloring->ncolors))))); 1268 } 1269 #endif 1270 } 1271 /* clean up */ 1272 PetscCall(MatDestroy(&Bt)); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1277 { 1278 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1279 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 1280 PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 1281 PetscLogDouble flops=0.0; 1282 MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 1283 Mat_MatMatTransMult *abt; 1284 Mat_Product *product = C->product; 1285 1286 PetscFunctionBegin; 1287 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1288 abt = (Mat_MatMatTransMult *)product->data; 1289 PetscCheck(abt,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1290 /* clear old values in C */ 1291 if (!c->a) { 1292 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1293 c->a = ca; 1294 c->free_a = PETSC_TRUE; 1295 } else { 1296 ca = c->a; 1297 PetscCall(PetscArrayzero(ca,ci[cm]+1)); 1298 } 1299 1300 if (abt->usecoloring) { 1301 MatTransposeColoring matcoloring = abt->matcoloring; 1302 Mat Bt_dense,C_dense = abt->ABt_den; 1303 1304 /* Get Bt_dense by Apply MatTransposeColoring to B */ 1305 Bt_dense = abt->Bt_den; 1306 PetscCall(MatTransColoringApplySpToDen(matcoloring,B,Bt_dense)); 1307 1308 /* C_dense = A*Bt_dense */ 1309 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense)); 1310 1311 /* Recover C from C_dense */ 1312 PetscCall(MatTransColoringApplyDenToSp(matcoloring,C_dense,C)); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 for (i=0; i<cm; i++) { 1317 anzi = ai[i+1] - ai[i]; 1318 acol = aj + ai[i]; 1319 aval = aa + ai[i]; 1320 cnzi = ci[i+1] - ci[i]; 1321 ccol = cj + ci[i]; 1322 cval = ca + ci[i]; 1323 for (j=0; j<cnzi; j++) { 1324 brow = ccol[j]; 1325 bnzj = bi[brow+1] - bi[brow]; 1326 bcol = bj + bi[brow]; 1327 bval = ba + bi[brow]; 1328 1329 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 1330 nexta = 0; nextb = 0; 1331 while (nexta<anzi && nextb<bnzj) { 1332 while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 1333 if (nexta == anzi) break; 1334 while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 1335 if (nextb == bnzj) break; 1336 if (acol[nexta] == bcol[nextb]) { 1337 cval[j] += aval[nexta]*bval[nextb]; 1338 nexta++; nextb++; 1339 flops += 2; 1340 } 1341 } 1342 } 1343 } 1344 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1345 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1346 PetscCall(PetscLogFlops(flops)); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 1351 { 1352 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 1353 1354 PetscFunctionBegin; 1355 PetscCall(MatDestroy(&atb->At)); 1356 if (atb->destroy) PetscCall((*atb->destroy)(atb->data)); 1357 PetscCall(PetscFree(atb)); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1362 { 1363 Mat At = NULL; 1364 Mat_Product *product = C->product; 1365 PetscBool flg,def,square; 1366 1367 PetscFunctionBegin; 1368 MatCheckProduct(C,4); 1369 square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE); 1370 /* outerproduct */ 1371 PetscCall(PetscStrcmp(product->alg,"outerproduct",&flg)); 1372 if (flg) { 1373 /* create symbolic At */ 1374 if (!square) { 1375 PetscCall(MatTransposeSymbolic(A,&At)); 1376 PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1377 PetscCall(MatSetType(At,((PetscObject)A)->type_name)); 1378 } 1379 /* get symbolic C=At*B */ 1380 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1381 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1382 1383 /* clean up */ 1384 if (!square) { 1385 PetscCall(MatDestroy(&At)); 1386 } 1387 1388 C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 1389 PetscCall(MatProductSetAlgorithm(C,"outerproduct")); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* matmatmult */ 1394 PetscCall(PetscStrcmp(product->alg,"default",&def)); 1395 PetscCall(PetscStrcmp(product->alg,"at*b",&flg)); 1396 if (flg || def) { 1397 Mat_MatTransMatMult *atb; 1398 1399 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1400 PetscCall(PetscNew(&atb)); 1401 if (!square) { 1402 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 1403 } 1404 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1405 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1406 PetscCall(MatProductSetAlgorithm(C,"at*b")); 1407 product->data = atb; 1408 product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 1409 atb->At = At; 1410 1411 C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 1412 PetscFunctionReturn(0); 1413 } 1414 1415 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1416 } 1417 1418 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1419 { 1420 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1421 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1422 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1423 PetscLogDouble flops=0.0; 1424 MatScalar *aa=a->a,*ba,*ca,*caj; 1425 1426 PetscFunctionBegin; 1427 if (!c->a) { 1428 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1429 1430 c->a = ca; 1431 c->free_a = PETSC_TRUE; 1432 } else { 1433 ca = c->a; 1434 PetscCall(PetscArrayzero(ca,ci[cm])); 1435 } 1436 1437 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1438 for (i=0; i<am; i++) { 1439 bj = b->j + bi[i]; 1440 ba = b->a + bi[i]; 1441 bnzi = bi[i+1] - bi[i]; 1442 anzi = ai[i+1] - ai[i]; 1443 for (j=0; j<anzi; j++) { 1444 nextb = 0; 1445 crow = *aj++; 1446 cjj = cj + ci[crow]; 1447 caj = ca + ci[crow]; 1448 /* perform sparse axpy operation. Note cjj includes bj. */ 1449 for (k=0; nextb<bnzi; k++) { 1450 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1451 caj[k] += (*aa)*(*(ba+nextb)); 1452 nextb++; 1453 } 1454 } 1455 flops += 2*bnzi; 1456 aa++; 1457 } 1458 } 1459 1460 /* Assemble the final matrix and clean up */ 1461 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1462 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1463 PetscCall(PetscLogFlops(flops)); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 1468 { 1469 PetscFunctionBegin; 1470 PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C)); 1471 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 1472 PetscFunctionReturn(0); 1473 } 1474 1475 PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 1476 { 1477 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1478 PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1479 const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1480 const PetscInt *aj; 1481 PetscInt cm=C->rmap->n,cn=B->cmap->n,bm,am=A->rmap->n; 1482 PetscInt clda; 1483 PetscInt am4,bm4,col,i,j,n; 1484 1485 PetscFunctionBegin; 1486 if (!cm || !cn) PetscFunctionReturn(0); 1487 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 1488 if (add) { 1489 PetscCall(MatDenseGetArray(C,&c)); 1490 } else { 1491 PetscCall(MatDenseGetArrayWrite(C,&c)); 1492 } 1493 PetscCall(MatDenseGetArrayRead(B,&b)); 1494 PetscCall(MatDenseGetLDA(B,&bm)); 1495 PetscCall(MatDenseGetLDA(C,&clda)); 1496 am4 = 4*clda; 1497 bm4 = 4*bm; 1498 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1499 c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 1500 for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 1501 for (i=0; i<am; i++) { /* over rows of A in those columns */ 1502 r1 = r2 = r3 = r4 = 0.0; 1503 n = a->i[i+1] - a->i[i]; 1504 aj = a->j + a->i[i]; 1505 aa = av + a->i[i]; 1506 for (j=0; j<n; j++) { 1507 const PetscScalar aatmp = aa[j]; 1508 const PetscInt ajtmp = aj[j]; 1509 r1 += aatmp*b1[ajtmp]; 1510 r2 += aatmp*b2[ajtmp]; 1511 r3 += aatmp*b3[ajtmp]; 1512 r4 += aatmp*b4[ajtmp]; 1513 } 1514 if (add) { 1515 c1[i] += r1; 1516 c2[i] += r2; 1517 c3[i] += r3; 1518 c4[i] += r4; 1519 } else { 1520 c1[i] = r1; 1521 c2[i] = r2; 1522 c3[i] = r3; 1523 c4[i] = r4; 1524 } 1525 } 1526 b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1527 c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1528 } 1529 /* process remaining columns */ 1530 if (col != cn) { 1531 PetscInt rc = cn-col; 1532 1533 if (rc == 1) { 1534 for (i=0; i<am; i++) { 1535 r1 = 0.0; 1536 n = a->i[i+1] - a->i[i]; 1537 aj = a->j + a->i[i]; 1538 aa = av + a->i[i]; 1539 for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 1540 if (add) c1[i] += r1; 1541 else c1[i] = r1; 1542 } 1543 } else if (rc == 2) { 1544 for (i=0; i<am; i++) { 1545 r1 = r2 = 0.0; 1546 n = a->i[i+1] - a->i[i]; 1547 aj = a->j + a->i[i]; 1548 aa = av + a->i[i]; 1549 for (j=0; j<n; j++) { 1550 const PetscScalar aatmp = aa[j]; 1551 const PetscInt ajtmp = aj[j]; 1552 r1 += aatmp*b1[ajtmp]; 1553 r2 += aatmp*b2[ajtmp]; 1554 } 1555 if (add) { 1556 c1[i] += r1; 1557 c2[i] += r2; 1558 } else { 1559 c1[i] = r1; 1560 c2[i] = r2; 1561 } 1562 } 1563 } else { 1564 for (i=0; i<am; i++) { 1565 r1 = r2 = r3 = 0.0; 1566 n = a->i[i+1] - a->i[i]; 1567 aj = a->j + a->i[i]; 1568 aa = av + a->i[i]; 1569 for (j=0; j<n; j++) { 1570 const PetscScalar aatmp = aa[j]; 1571 const PetscInt ajtmp = aj[j]; 1572 r1 += aatmp*b1[ajtmp]; 1573 r2 += aatmp*b2[ajtmp]; 1574 r3 += aatmp*b3[ajtmp]; 1575 } 1576 if (add) { 1577 c1[i] += r1; 1578 c2[i] += r2; 1579 c3[i] += r3; 1580 } else { 1581 c1[i] = r1; 1582 c2[i] = r2; 1583 c3[i] = r3; 1584 } 1585 } 1586 } 1587 } 1588 PetscCall(PetscLogFlops(cn*(2.0*a->nz))); 1589 if (add) { 1590 PetscCall(MatDenseRestoreArray(C,&c)); 1591 } else { 1592 PetscCall(MatDenseRestoreArrayWrite(C,&c)); 1593 } 1594 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1595 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1600 { 1601 PetscFunctionBegin; 1602 PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n); 1603 PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n); 1604 PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n); 1605 1606 PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE)); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 /* ------------------------------------------------------- */ 1611 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 1612 { 1613 PetscFunctionBegin; 1614 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 1615 C->ops->productsymbolic = MatProductSymbolic_AB; 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 1620 1621 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 1622 { 1623 PetscFunctionBegin; 1624 C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1625 C->ops->productsymbolic = MatProductSymbolic_AtB; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 1630 { 1631 PetscFunctionBegin; 1632 C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1633 C->ops->productsymbolic = MatProductSymbolic_ABt; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 1638 { 1639 Mat_Product *product = C->product; 1640 1641 PetscFunctionBegin; 1642 switch (product->type) { 1643 case MATPRODUCT_AB: 1644 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C)); 1645 break; 1646 case MATPRODUCT_AtB: 1647 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C)); 1648 break; 1649 case MATPRODUCT_ABt: 1650 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C)); 1651 break; 1652 default: 1653 break; 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 /* ------------------------------------------------------- */ 1658 static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 1659 { 1660 Mat_Product *product = C->product; 1661 Mat A = product->A; 1662 PetscBool baij; 1663 1664 PetscFunctionBegin; 1665 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij)); 1666 if (!baij) { /* A is seqsbaij */ 1667 PetscBool sbaij; 1668 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij)); 1669 PetscCheck(sbaij,PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 1670 1671 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 1672 } else { /* A is seqbaij */ 1673 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 1674 } 1675 1676 C->ops->productsymbolic = MatProductSymbolic_AB; 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 1681 { 1682 Mat_Product *product = C->product; 1683 1684 PetscFunctionBegin; 1685 MatCheckProduct(C,1); 1686 PetscCheck(product->A,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 1687 if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C)); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /* ------------------------------------------------------- */ 1692 static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 1693 { 1694 PetscFunctionBegin; 1695 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 1696 C->ops->productsymbolic = MatProductSymbolic_AB; 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 1701 { 1702 Mat_Product *product = C->product; 1703 1704 PetscFunctionBegin; 1705 if (product->type == MATPRODUCT_AB) { 1706 PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C)); 1707 } 1708 PetscFunctionReturn(0); 1709 } 1710 /* ------------------------------------------------------- */ 1711 1712 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1713 { 1714 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1715 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1716 PetscInt *bi = b->i,*bj=b->j; 1717 PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1718 MatScalar *btval,*btval_den,*ba=b->a; 1719 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1720 1721 PetscFunctionBegin; 1722 btval_den=btdense->v; 1723 PetscCall(PetscArrayzero(btval_den,m*n)); 1724 for (k=0; k<ncolors; k++) { 1725 ncolumns = coloring->ncolumns[k]; 1726 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1727 col = *(columns + colorforcol[k] + l); 1728 btcol = bj + bi[col]; 1729 btval = ba + bi[col]; 1730 anz = bi[col+1] - bi[col]; 1731 for (j=0; j<anz; j++) { 1732 brow = btcol[j]; 1733 btval_den[brow] = btval[j]; 1734 } 1735 } 1736 btval_den += m; 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1742 { 1743 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1744 const PetscScalar *ca_den,*ca_den_ptr; 1745 PetscScalar *ca=csp->a; 1746 PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1747 PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1748 PetscInt nrows,*row,*idx; 1749 PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 1750 1751 PetscFunctionBegin; 1752 PetscCall(MatDenseGetArrayRead(Cden,&ca_den)); 1753 1754 if (brows > 0) { 1755 PetscInt *lstart,row_end,row_start; 1756 lstart = matcoloring->lstart; 1757 PetscCall(PetscArrayzero(lstart,ncolors)); 1758 1759 row_end = brows; 1760 if (row_end > m) row_end = m; 1761 for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1762 ca_den_ptr = ca_den; 1763 for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1764 nrows = matcoloring->nrows[k]; 1765 row = rows + colorforrow[k]; 1766 idx = den2sp + colorforrow[k]; 1767 for (l=lstart[k]; l<nrows; l++) { 1768 if (row[l] >= row_end) { 1769 lstart[k] = l; 1770 break; 1771 } else { 1772 ca[idx[l]] = ca_den_ptr[row[l]]; 1773 } 1774 } 1775 ca_den_ptr += m; 1776 } 1777 row_end += brows; 1778 if (row_end > m) row_end = m; 1779 } 1780 } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1781 ca_den_ptr = ca_den; 1782 for (k=0; k<ncolors; k++) { 1783 nrows = matcoloring->nrows[k]; 1784 row = rows + colorforrow[k]; 1785 idx = den2sp + colorforrow[k]; 1786 for (l=0; l<nrows; l++) { 1787 ca[idx[l]] = ca_den_ptr[row[l]]; 1788 } 1789 ca_den_ptr += m; 1790 } 1791 } 1792 1793 PetscCall(MatDenseRestoreArrayRead(Cden,&ca_den)); 1794 #if defined(PETSC_USE_INFO) 1795 if (matcoloring->brows > 0) { 1796 PetscCall(PetscInfo(Csp,"Loop over %" PetscInt_FMT " row blocks for den2sp\n",brows)); 1797 } else { 1798 PetscCall(PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")); 1799 } 1800 #endif 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1805 { 1806 PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 1807 const PetscInt *is,*ci,*cj,*row_idx; 1808 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1809 IS *isa; 1810 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1811 PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1812 PetscInt *colorforcol,*columns,*columns_i,brows; 1813 PetscBool flg; 1814 1815 PetscFunctionBegin; 1816 PetscCall(ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa)); 1817 1818 /* bs >1 is not being tested yet! */ 1819 Nbs = mat->cmap->N/bs; 1820 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1821 c->N = Nbs; 1822 c->m = c->M; 1823 c->rstart = 0; 1824 c->brows = 100; 1825 1826 c->ncolors = nis; 1827 PetscCall(PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow)); 1828 PetscCall(PetscMalloc1(csp->nz+1,&rows)); 1829 PetscCall(PetscMalloc1(csp->nz+1,&den2sp)); 1830 1831 brows = c->brows; 1832 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg)); 1833 if (flg) c->brows = brows; 1834 if (brows > 0) { 1835 PetscCall(PetscMalloc1(nis+1,&c->lstart)); 1836 } 1837 1838 colorforrow[0] = 0; 1839 rows_i = rows; 1840 den2sp_i = den2sp; 1841 1842 PetscCall(PetscMalloc1(nis+1,&colorforcol)); 1843 PetscCall(PetscMalloc1(Nbs+1,&columns)); 1844 1845 colorforcol[0] = 0; 1846 columns_i = columns; 1847 1848 /* get column-wise storage of mat */ 1849 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1850 1851 cm = c->m; 1852 PetscCall(PetscMalloc1(cm+1,&rowhit)); 1853 PetscCall(PetscMalloc1(cm+1,&idxhit)); 1854 for (i=0; i<nis; i++) { /* loop over color */ 1855 PetscCall(ISGetLocalSize(isa[i],&n)); 1856 PetscCall(ISGetIndices(isa[i],&is)); 1857 1858 c->ncolumns[i] = n; 1859 if (n) PetscCall(PetscArraycpy(columns_i,is,n)); 1860 colorforcol[i+1] = colorforcol[i] + n; 1861 columns_i += n; 1862 1863 /* fast, crude version requires O(N*N) work */ 1864 PetscCall(PetscArrayzero(rowhit,cm)); 1865 1866 for (j=0; j<n; j++) { /* loop over columns*/ 1867 col = is[j]; 1868 row_idx = cj + ci[col]; 1869 m = ci[col+1] - ci[col]; 1870 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1871 idxhit[*row_idx] = spidx[ci[col] + k]; 1872 rowhit[*row_idx++] = col + 1; 1873 } 1874 } 1875 /* count the number of hits */ 1876 nrows = 0; 1877 for (j=0; j<cm; j++) { 1878 if (rowhit[j]) nrows++; 1879 } 1880 c->nrows[i] = nrows; 1881 colorforrow[i+1] = colorforrow[i] + nrows; 1882 1883 nrows = 0; 1884 for (j=0; j<cm; j++) { /* loop over rows */ 1885 if (rowhit[j]) { 1886 rows_i[nrows] = j; 1887 den2sp_i[nrows] = idxhit[j]; 1888 nrows++; 1889 } 1890 } 1891 den2sp_i += nrows; 1892 1893 PetscCall(ISRestoreIndices(isa[i],&is)); 1894 rows_i += nrows; 1895 } 1896 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1897 PetscCall(PetscFree(rowhit)); 1898 PetscCall(ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa)); 1899 PetscCheck(csp->nz == colorforrow[nis],PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT,csp->nz,colorforrow[nis]); 1900 1901 c->colorforrow = colorforrow; 1902 c->rows = rows; 1903 c->den2sp = den2sp; 1904 c->colorforcol = colorforcol; 1905 c->columns = columns; 1906 1907 PetscCall(PetscFree(idxhit)); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 /* --------------------------------------------------------------- */ 1912 static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1913 { 1914 Mat_Product *product = C->product; 1915 Mat A=product->A,B=product->B; 1916 1917 PetscFunctionBegin; 1918 if (C->ops->mattransposemultnumeric) { 1919 /* Alg: "outerproduct" */ 1920 PetscCall((*C->ops->mattransposemultnumeric)(A,B,C)); 1921 } else { 1922 /* Alg: "matmatmult" -- C = At*B */ 1923 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 1924 1925 PetscCheck(atb,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1926 if (atb->At) { 1927 /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(); 1928 user may have called MatProductReplaceMats() to get this A=product->A */ 1929 PetscCall(MatTransposeSetPrecursor(A,atb->At)); 1930 PetscCall(MatTranspose(A,MAT_REUSE_MATRIX,&atb->At)); 1931 } 1932 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A,B,C)); 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1938 { 1939 Mat_Product *product = C->product; 1940 Mat A=product->A,B=product->B; 1941 PetscReal fill=product->fill; 1942 1943 PetscFunctionBegin; 1944 PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C)); 1945 1946 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 /* --------------------------------------------------------------- */ 1951 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 1952 { 1953 Mat_Product *product = C->product; 1954 PetscInt alg = 0; /* default algorithm */ 1955 PetscBool flg = PETSC_FALSE; 1956 #if !defined(PETSC_HAVE_HYPRE) 1957 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 1958 PetscInt nalg = 7; 1959 #else 1960 const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 1961 PetscInt nalg = 8; 1962 #endif 1963 1964 PetscFunctionBegin; 1965 /* Set default algorithm */ 1966 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 1967 if (flg) { 1968 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1969 } 1970 1971 /* Get runtime option */ 1972 if (product->api_user) { 1973 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 1974 PetscCall(PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg)); 1975 PetscOptionsEnd(); 1976 } else { 1977 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 1978 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg)); 1979 PetscOptionsEnd(); 1980 } 1981 if (flg) { 1982 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1983 } 1984 1985 C->ops->productsymbolic = MatProductSymbolic_AB; 1986 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 1991 { 1992 Mat_Product *product = C->product; 1993 PetscInt alg = 0; /* default algorithm */ 1994 PetscBool flg = PETSC_FALSE; 1995 const char *algTypes[3] = {"default","at*b","outerproduct"}; 1996 PetscInt nalg = 3; 1997 1998 PetscFunctionBegin; 1999 /* Get runtime option */ 2000 if (product->api_user) { 2001 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat"); 2002 PetscCall(PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2003 PetscOptionsEnd(); 2004 } else { 2005 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat"); 2006 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg)); 2007 PetscOptionsEnd(); 2008 } 2009 if (flg) { 2010 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2011 } 2012 2013 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 2018 { 2019 Mat_Product *product = C->product; 2020 PetscInt alg = 0; /* default algorithm */ 2021 PetscBool flg = PETSC_FALSE; 2022 const char *algTypes[2] = {"default","color"}; 2023 PetscInt nalg = 2; 2024 2025 PetscFunctionBegin; 2026 /* Set default algorithm */ 2027 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 2028 if (!flg) { 2029 alg = 1; 2030 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2031 } 2032 2033 /* Get runtime option */ 2034 if (product->api_user) { 2035 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2036 PetscCall(PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2037 PetscOptionsEnd(); 2038 } else { 2039 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2040 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2041 PetscOptionsEnd(); 2042 } 2043 if (flg) { 2044 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2045 } 2046 2047 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 2048 C->ops->productsymbolic = MatProductSymbolic_ABt; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 2053 { 2054 Mat_Product *product = C->product; 2055 PetscBool flg = PETSC_FALSE; 2056 PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 2057 #if !defined(PETSC_HAVE_HYPRE) 2058 const char *algTypes[2] = {"scalable","rap"}; 2059 PetscInt nalg = 2; 2060 #else 2061 const char *algTypes[3] = {"scalable","rap","hypre"}; 2062 PetscInt nalg = 3; 2063 #endif 2064 2065 PetscFunctionBegin; 2066 /* Set default algorithm */ 2067 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2068 if (flg) { 2069 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2070 } 2071 2072 /* Get runtime option */ 2073 if (product->api_user) { 2074 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 2075 PetscCall(PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2076 PetscOptionsEnd(); 2077 } else { 2078 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 2079 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2080 PetscOptionsEnd(); 2081 } 2082 if (flg) { 2083 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2084 } 2085 2086 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 2087 PetscFunctionReturn(0); 2088 } 2089 2090 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 2091 { 2092 Mat_Product *product = C->product; 2093 PetscBool flg = PETSC_FALSE; 2094 PetscInt alg = 0; /* default algorithm */ 2095 const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 2096 PetscInt nalg = 3; 2097 2098 PetscFunctionBegin; 2099 /* Set default algorithm */ 2100 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2101 if (flg) { 2102 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2103 } 2104 2105 /* Get runtime option */ 2106 if (product->api_user) { 2107 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat"); 2108 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2109 PetscOptionsEnd(); 2110 } else { 2111 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat"); 2112 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2113 PetscOptionsEnd(); 2114 } 2115 if (flg) { 2116 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2117 } 2118 2119 C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 2124 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 2125 { 2126 Mat_Product *product = C->product; 2127 PetscInt alg = 0; /* default algorithm */ 2128 PetscBool flg = PETSC_FALSE; 2129 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 2130 PetscInt nalg = 7; 2131 2132 PetscFunctionBegin; 2133 /* Set default algorithm */ 2134 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2135 if (flg) { 2136 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2137 } 2138 2139 /* Get runtime option */ 2140 if (product->api_user) { 2141 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat"); 2142 PetscCall(PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2143 PetscOptionsEnd(); 2144 } else { 2145 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat"); 2146 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg)); 2147 PetscOptionsEnd(); 2148 } 2149 if (flg) { 2150 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2151 } 2152 2153 C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 2154 C->ops->productsymbolic = MatProductSymbolic_ABC; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 2159 { 2160 Mat_Product *product = C->product; 2161 2162 PetscFunctionBegin; 2163 switch (product->type) { 2164 case MATPRODUCT_AB: 2165 PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C)); 2166 break; 2167 case MATPRODUCT_AtB: 2168 PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C)); 2169 break; 2170 case MATPRODUCT_ABt: 2171 PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C)); 2172 break; 2173 case MATPRODUCT_PtAP: 2174 PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C)); 2175 break; 2176 case MATPRODUCT_RARt: 2177 PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C)); 2178 break; 2179 case MATPRODUCT_ABC: 2180 PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C)); 2181 break; 2182 default: 2183 break; 2184 } 2185 PetscFunctionReturn(0); 2186 } 2187