1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <petsc/private/isimpl.h> 11 #include <../src/mat/impls/dense/seq/dense.h> 12 13 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14 { 15 PetscFunctionBegin; 16 if (C->ops->matmultnumeric) { 17 PetscCheck(C->ops->matmultnumeric != MatMatMultNumeric_SeqAIJ_SeqAIJ,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call"); 18 PetscCall((*C->ops->matmultnumeric)(A,B,C)); 19 } else { 20 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C)); 21 } 22 PetscFunctionReturn(0); 23 } 24 25 /* Modified from MatCreateSeqAIJWithArrays() */ 26 PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 27 { 28 PetscInt ii; 29 Mat_SeqAIJ *aij; 30 PetscBool isseqaij, osingle, ofree_a, ofree_ij; 31 32 PetscFunctionBegin; 33 PetscCheck(m <= 0 || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 34 PetscCall(MatSetSizes(mat,m,n,m,n)); 35 36 if (!mtype) { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij)); 38 if (!isseqaij) PetscCall(MatSetType(mat,MATSEQAIJ)); 39 } else { 40 PetscCall(MatSetType(mat,mtype)); 41 } 42 43 aij = (Mat_SeqAIJ*)(mat)->data; 44 osingle = aij->singlemalloc; 45 ofree_a = aij->free_a; 46 ofree_ij = aij->free_ij; 47 /* changes the free flags */ 48 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL)); 49 50 PetscCall(PetscFree(aij->ilen)); 51 PetscCall(PetscFree(aij->imax)); 52 PetscCall(PetscMalloc1(m,&aij->imax)); 53 PetscCall(PetscMalloc1(m,&aij->ilen)); 54 for (ii=0,aij->nonzerorowcnt=0,aij->rmax = 0; ii<m; ii++) { 55 const PetscInt rnz = i[ii+1] - i[ii]; 56 aij->nonzerorowcnt += !!rnz; 57 aij->rmax = PetscMax(aij->rmax,rnz); 58 aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 59 } 60 aij->maxnz = i[m]; 61 aij->nz = i[m]; 62 63 if (osingle) { 64 PetscCall(PetscFree3(aij->a,aij->j,aij->i)); 65 } else { 66 if (ofree_a) PetscCall(PetscFree(aij->a)); 67 if (ofree_ij) PetscCall(PetscFree(aij->j)); 68 if (ofree_ij) PetscCall(PetscFree(aij->i)); 69 } 70 aij->i = i; 71 aij->j = j; 72 aij->a = a; 73 aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */ 74 /* default to not retain ownership */ 75 aij->singlemalloc = PETSC_FALSE; 76 aij->free_a = PETSC_FALSE; 77 aij->free_ij = PETSC_FALSE; 78 PetscCall(MatCheckCompressedRow(mat,aij->nonzerorowcnt,&aij->compressedrow,aij->i,m,0.6)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 83 { 84 Mat_Product *product = C->product; 85 MatProductAlgorithm alg; 86 PetscBool flg; 87 88 PetscFunctionBegin; 89 if (product) { 90 alg = product->alg; 91 } else { 92 alg = "sorted"; 93 } 94 /* sorted */ 95 PetscCall(PetscStrcmp(alg,"sorted",&flg)); 96 if (flg) { 97 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C)); 98 PetscFunctionReturn(0); 99 } 100 101 /* scalable */ 102 PetscCall(PetscStrcmp(alg,"scalable",&flg)); 103 if (flg) { 104 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C)); 105 PetscFunctionReturn(0); 106 } 107 108 /* scalable_fast */ 109 PetscCall(PetscStrcmp(alg,"scalable_fast",&flg)); 110 if (flg) { 111 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C)); 112 PetscFunctionReturn(0); 113 } 114 115 /* heap */ 116 PetscCall(PetscStrcmp(alg,"heap",&flg)); 117 if (flg) { 118 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C)); 119 PetscFunctionReturn(0); 120 } 121 122 /* btheap */ 123 PetscCall(PetscStrcmp(alg,"btheap",&flg)); 124 if (flg) { 125 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C)); 126 PetscFunctionReturn(0); 127 } 128 129 /* llcondensed */ 130 PetscCall(PetscStrcmp(alg,"llcondensed",&flg)); 131 if (flg) { 132 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C)); 133 PetscFunctionReturn(0); 134 } 135 136 /* rowmerge */ 137 PetscCall(PetscStrcmp(alg,"rowmerge",&flg)); 138 if (flg) { 139 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C)); 140 PetscFunctionReturn(0); 141 } 142 143 #if defined(PETSC_HAVE_HYPRE) 144 PetscCall(PetscStrcmp(alg,"hypre",&flg)); 145 if (flg) { 146 PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C)); 147 PetscFunctionReturn(0); 148 } 149 #endif 150 151 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 152 } 153 154 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 155 { 156 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 157 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 158 PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 159 PetscReal afill; 160 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 161 PetscTable ta; 162 PetscBT lnkbt; 163 PetscFreeSpaceList free_space=NULL,current_space=NULL; 164 165 PetscFunctionBegin; 166 /* Get ci and cj */ 167 /*---------------*/ 168 /* Allocate ci array, arrays for fill computation and */ 169 /* free space for accumulating nonzero column info */ 170 PetscCall(PetscMalloc1(am+2,&ci)); 171 ci[0] = 0; 172 173 /* create and initialize a linked list */ 174 PetscCall(PetscTableCreate(bn,bn,&ta)); 175 MatRowMergeMax_SeqAIJ(b,bm,ta); 176 PetscCall(PetscTableGetCount(ta,&Crmax)); 177 PetscCall(PetscTableDestroy(&ta)); 178 179 PetscCall(PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt)); 180 181 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 182 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 183 184 current_space = free_space; 185 186 /* Determine ci and cj */ 187 for (i=0; i<am; i++) { 188 anzi = ai[i+1] - ai[i]; 189 aj = a->j + ai[i]; 190 for (j=0; j<anzi; j++) { 191 brow = aj[j]; 192 bnzj = bi[brow+1] - bi[brow]; 193 bj = b->j + bi[brow]; 194 /* add non-zero cols of B into the sorted linked list lnk */ 195 PetscCall(PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt)); 196 } 197 /* add possible missing diagonal entry */ 198 if (C->force_diagonals) { 199 PetscCall(PetscLLCondensedAddSorted(1,&i,lnk,lnkbt)); 200 } 201 cnzi = lnk[0]; 202 203 /* If free space is not available, make more free space */ 204 /* Double the amount of total space in the list */ 205 if (current_space->local_remaining<cnzi) { 206 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 207 ndouble++; 208 } 209 210 /* Copy data into free space, then initialize lnk */ 211 PetscCall(PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt)); 212 213 current_space->array += cnzi; 214 current_space->local_used += cnzi; 215 current_space->local_remaining -= cnzi; 216 217 ci[i+1] = ci[i] + cnzi; 218 } 219 220 /* Column indices are in the list of free space */ 221 /* Allocate space for cj, initialize cj, and */ 222 /* destroy list of free space and other temporary array(s) */ 223 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 224 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 225 PetscCall(PetscLLCondensedDestroy(lnk,lnkbt)); 226 227 /* put together the new symbolic matrix */ 228 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 229 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 230 231 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 232 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 233 c = (Mat_SeqAIJ*)(C->data); 234 c->free_a = PETSC_FALSE; 235 c->free_ij = PETSC_TRUE; 236 c->nonew = 0; 237 238 /* fast, needs non-scalable O(bn) array 'abdense' */ 239 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 240 241 /* set MatInfo */ 242 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 243 if (afill < 1.0) afill = 1.0; 244 C->info.mallocs = ndouble; 245 C->info.fill_ratio_given = fill; 246 C->info.fill_ratio_needed = afill; 247 248 #if defined(PETSC_USE_INFO) 249 if (ci[am]) { 250 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 251 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 252 } else { 253 PetscCall(PetscInfo(C,"Empty matrix product\n")); 254 } 255 #endif 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 260 { 261 PetscLogDouble flops=0.0; 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 263 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 264 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 265 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 266 PetscInt am =A->rmap->n,cm=C->rmap->n; 267 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 268 PetscScalar *ca,valtmp; 269 PetscScalar *ab_dense; 270 PetscContainer cab_dense; 271 const PetscScalar *aa,*ba,*baj; 272 273 PetscFunctionBegin; 274 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 275 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 276 if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 277 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 278 c->a = ca; 279 c->free_a = PETSC_TRUE; 280 } else ca = c->a; 281 282 /* TODO this should be done in the symbolic phase */ 283 /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 284 that is hard to eradicate) */ 285 PetscCall(PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense)); 286 if (!cab_dense) { 287 PetscCall(PetscMalloc1(B->cmap->N,&ab_dense)); 288 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&cab_dense)); 289 PetscCall(PetscContainerSetPointer(cab_dense,ab_dense)); 290 PetscCall(PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault)); 291 PetscCall(PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense)); 292 PetscCall(PetscObjectDereference((PetscObject)cab_dense)); 293 } 294 PetscCall(PetscContainerGetPointer(cab_dense,(void**)&ab_dense)); 295 PetscCall(PetscArrayzero(ab_dense,B->cmap->N)); 296 297 /* clean old values in C */ 298 PetscCall(PetscArrayzero(ca,ci[cm])); 299 /* Traverse A row-wise. */ 300 /* Build the ith row in C by summing over nonzero columns in A, */ 301 /* the rows of B corresponding to nonzeros of A. */ 302 for (i=0; i<am; i++) { 303 anzi = ai[i+1] - ai[i]; 304 for (j=0; j<anzi; j++) { 305 brow = aj[j]; 306 bnzi = bi[brow+1] - bi[brow]; 307 bjj = bj + bi[brow]; 308 baj = ba + bi[brow]; 309 /* perform dense axpy */ 310 valtmp = aa[j]; 311 for (k=0; k<bnzi; k++) { 312 ab_dense[bjj[k]] += valtmp*baj[k]; 313 } 314 flops += 2*bnzi; 315 } 316 aj += anzi; aa += anzi; 317 318 cnzi = ci[i+1] - ci[i]; 319 for (k=0; k<cnzi; k++) { 320 ca[k] += ab_dense[cj[k]]; 321 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 322 } 323 flops += cnzi; 324 cj += cnzi; ca += cnzi; 325 } 326 #if defined(PETSC_HAVE_DEVICE) 327 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 328 #endif 329 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 331 PetscCall(PetscLogFlops(flops)); 332 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 333 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 338 { 339 PetscLogDouble flops=0.0; 340 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 341 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 342 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 343 PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 344 PetscInt am = A->rmap->N,cm=C->rmap->N; 345 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 346 PetscScalar *ca=c->a,valtmp; 347 const PetscScalar *aa,*ba,*baj; 348 PetscInt nextb; 349 350 PetscFunctionBegin; 351 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 352 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 353 if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 354 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 355 c->a = ca; 356 c->free_a = PETSC_TRUE; 357 } 358 359 /* clean old values in C */ 360 PetscCall(PetscArrayzero(ca,ci[cm])); 361 /* Traverse A row-wise. */ 362 /* Build the ith row in C by summing over nonzero columns in A, */ 363 /* the rows of B corresponding to nonzeros of A. */ 364 for (i=0; i<am; i++) { 365 anzi = ai[i+1] - ai[i]; 366 cnzi = ci[i+1] - ci[i]; 367 for (j=0; j<anzi; j++) { 368 brow = aj[j]; 369 bnzi = bi[brow+1] - bi[brow]; 370 bjj = bj + bi[brow]; 371 baj = ba + bi[brow]; 372 /* perform sparse axpy */ 373 valtmp = aa[j]; 374 nextb = 0; 375 for (k=0; nextb<bnzi; k++) { 376 if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 377 ca[k] += valtmp*baj[nextb++]; 378 } 379 } 380 flops += 2*bnzi; 381 } 382 aj += anzi; aa += anzi; 383 cj += cnzi; ca += cnzi; 384 } 385 #if defined(PETSC_HAVE_DEVICE) 386 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 387 #endif 388 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 389 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 390 PetscCall(PetscLogFlops(flops)); 391 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 392 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 397 { 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 399 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 400 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 401 MatScalar *ca; 402 PetscReal afill; 403 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 404 PetscTable ta; 405 PetscFreeSpaceList free_space=NULL,current_space=NULL; 406 407 PetscFunctionBegin; 408 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 409 /*-----------------------------------------------------------------------------------------*/ 410 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 411 PetscCall(PetscMalloc1(am+2,&ci)); 412 ci[0] = 0; 413 414 /* create and initialize a linked list */ 415 PetscCall(PetscTableCreate(bn,bn,&ta)); 416 MatRowMergeMax_SeqAIJ(b,bm,ta); 417 PetscCall(PetscTableGetCount(ta,&Crmax)); 418 PetscCall(PetscTableDestroy(&ta)); 419 420 PetscCall(PetscLLCondensedCreate_fast(Crmax,&lnk)); 421 422 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 423 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 424 current_space = free_space; 425 426 /* Determine ci and cj */ 427 for (i=0; i<am; i++) { 428 anzi = ai[i+1] - ai[i]; 429 aj = a->j + ai[i]; 430 for (j=0; j<anzi; j++) { 431 brow = aj[j]; 432 bnzj = bi[brow+1] - bi[brow]; 433 bj = b->j + bi[brow]; 434 /* add non-zero cols of B into the sorted linked list lnk */ 435 PetscCall(PetscLLCondensedAddSorted_fast(bnzj,bj,lnk)); 436 } 437 /* add possible missing diagonal entry */ 438 if (C->force_diagonals) { 439 PetscCall(PetscLLCondensedAddSorted_fast(1,&i,lnk)); 440 } 441 cnzi = lnk[1]; 442 443 /* If free space is not available, make more free space */ 444 /* Double the amount of total space in the list */ 445 if (current_space->local_remaining<cnzi) { 446 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 447 ndouble++; 448 } 449 450 /* Copy data into free space, then initialize lnk */ 451 PetscCall(PetscLLCondensedClean_fast(cnzi,current_space->array,lnk)); 452 453 current_space->array += cnzi; 454 current_space->local_used += cnzi; 455 current_space->local_remaining -= cnzi; 456 457 ci[i+1] = ci[i] + cnzi; 458 } 459 460 /* Column indices are in the list of free space */ 461 /* Allocate space for cj, initialize cj, and */ 462 /* destroy list of free space and other temporary array(s) */ 463 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 464 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 465 PetscCall(PetscLLCondensedDestroy_fast(lnk)); 466 467 /* Allocate space for ca */ 468 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 469 470 /* put together the new symbolic matrix */ 471 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 472 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 473 474 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 475 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 476 c = (Mat_SeqAIJ*)(C->data); 477 c->free_a = PETSC_TRUE; 478 c->free_ij = PETSC_TRUE; 479 c->nonew = 0; 480 481 /* slower, less memory */ 482 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 483 484 /* set MatInfo */ 485 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 486 if (afill < 1.0) afill = 1.0; 487 C->info.mallocs = ndouble; 488 C->info.fill_ratio_given = fill; 489 C->info.fill_ratio_needed = afill; 490 491 #if defined(PETSC_USE_INFO) 492 if (ci[am]) { 493 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 494 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 495 } else { 496 PetscCall(PetscInfo(C,"Empty matrix product\n")); 497 } 498 #endif 499 PetscFunctionReturn(0); 500 } 501 502 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 503 { 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 505 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 506 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 507 MatScalar *ca; 508 PetscReal afill; 509 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 510 PetscTable ta; 511 PetscFreeSpaceList free_space=NULL,current_space=NULL; 512 513 PetscFunctionBegin; 514 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 515 /*---------------------------------------------------------------------------------------------*/ 516 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 517 PetscCall(PetscMalloc1(am+2,&ci)); 518 ci[0] = 0; 519 520 /* create and initialize a linked list */ 521 PetscCall(PetscTableCreate(bn,bn,&ta)); 522 MatRowMergeMax_SeqAIJ(b,bm,ta); 523 PetscCall(PetscTableGetCount(ta,&Crmax)); 524 PetscCall(PetscTableDestroy(&ta)); 525 PetscCall(PetscLLCondensedCreate_Scalable(Crmax,&lnk)); 526 527 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 528 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 529 current_space = free_space; 530 531 /* Determine ci and cj */ 532 for (i=0; i<am; i++) { 533 anzi = ai[i+1] - ai[i]; 534 aj = a->j + ai[i]; 535 for (j=0; j<anzi; j++) { 536 brow = aj[j]; 537 bnzj = bi[brow+1] - bi[brow]; 538 bj = b->j + bi[brow]; 539 /* add non-zero cols of B into the sorted linked list lnk */ 540 PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk)); 541 } 542 /* add possible missing diagonal entry */ 543 if (C->force_diagonals) { 544 PetscCall(PetscLLCondensedAddSorted_Scalable(1,&i,lnk)); 545 } 546 547 cnzi = lnk[0]; 548 549 /* If free space is not available, make more free space */ 550 /* Double the amount of total space in the list */ 551 if (current_space->local_remaining<cnzi) { 552 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 553 ndouble++; 554 } 555 556 /* Copy data into free space, then initialize lnk */ 557 PetscCall(PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk)); 558 559 current_space->array += cnzi; 560 current_space->local_used += cnzi; 561 current_space->local_remaining -= cnzi; 562 563 ci[i+1] = ci[i] + cnzi; 564 } 565 566 /* Column indices are in the list of free space */ 567 /* Allocate space for cj, initialize cj, and */ 568 /* destroy list of free space and other temporary array(s) */ 569 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 570 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 571 PetscCall(PetscLLCondensedDestroy_Scalable(lnk)); 572 573 /* Allocate space for ca */ 574 /*-----------------------*/ 575 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 576 577 /* put together the new symbolic matrix */ 578 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 579 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 580 581 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 582 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 583 c = (Mat_SeqAIJ*)(C->data); 584 c->free_a = PETSC_TRUE; 585 c->free_ij = PETSC_TRUE; 586 c->nonew = 0; 587 588 /* slower, less memory */ 589 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 590 591 /* set MatInfo */ 592 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 593 if (afill < 1.0) afill = 1.0; 594 C->info.mallocs = ndouble; 595 C->info.fill_ratio_given = fill; 596 C->info.fill_ratio_needed = afill; 597 598 #if defined(PETSC_USE_INFO) 599 if (ci[am]) { 600 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 601 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 602 } else { 603 PetscCall(PetscInfo(C,"Empty matrix product\n")); 604 } 605 #endif 606 PetscFunctionReturn(0); 607 } 608 609 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 610 { 611 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 612 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 613 PetscInt *ci,*cj,*bb; 614 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 615 PetscReal afill; 616 PetscInt i,j,col,ndouble = 0; 617 PetscFreeSpaceList free_space=NULL,current_space=NULL; 618 PetscHeap h; 619 620 PetscFunctionBegin; 621 /* Get ci and cj - by merging sorted rows using a heap */ 622 /*---------------------------------------------------------------------------------------------*/ 623 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 624 PetscCall(PetscMalloc1(am+2,&ci)); 625 ci[0] = 0; 626 627 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 628 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 629 current_space = free_space; 630 631 PetscCall(PetscHeapCreate(a->rmax,&h)); 632 PetscCall(PetscMalloc1(a->rmax,&bb)); 633 634 /* Determine ci and cj */ 635 for (i=0; i<am; i++) { 636 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 637 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 638 ci[i+1] = ci[i]; 639 /* Populate the min heap */ 640 for (j=0; j<anzi; j++) { 641 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 642 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 643 PetscCall(PetscHeapAdd(h,j,bj[bb[j]++])); 644 } 645 } 646 /* Pick off the min element, adding it to free space */ 647 PetscCall(PetscHeapPop(h,&j,&col)); 648 while (j >= 0) { 649 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 650 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 651 ndouble++; 652 } 653 *(current_space->array++) = col; 654 current_space->local_used++; 655 current_space->local_remaining--; 656 ci[i+1]++; 657 658 /* stash if anything else remains in this row of B */ 659 if (bb[j] < bi[acol[j]+1]) PetscCall(PetscHeapStash(h,j,bj[bb[j]++])); 660 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 661 PetscInt j2,col2; 662 PetscCall(PetscHeapPeek(h,&j2,&col2)); 663 if (col2 != col) break; 664 PetscCall(PetscHeapPop(h,&j2,&col2)); 665 if (bb[j2] < bi[acol[j2]+1]) PetscCall(PetscHeapStash(h,j2,bj[bb[j2]++])); 666 } 667 /* Put any stashed elements back into the min heap */ 668 PetscCall(PetscHeapUnstash(h)); 669 PetscCall(PetscHeapPop(h,&j,&col)); 670 } 671 } 672 PetscCall(PetscFree(bb)); 673 PetscCall(PetscHeapDestroy(&h)); 674 675 /* Column indices are in the list of free space */ 676 /* Allocate space for cj, initialize cj, and */ 677 /* destroy list of free space and other temporary array(s) */ 678 PetscCall(PetscMalloc1(ci[am],&cj)); 679 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 680 681 /* put together the new symbolic matrix */ 682 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 683 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 684 685 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 686 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 687 c = (Mat_SeqAIJ*)(C->data); 688 c->free_a = PETSC_TRUE; 689 c->free_ij = PETSC_TRUE; 690 c->nonew = 0; 691 692 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 693 694 /* set MatInfo */ 695 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 696 if (afill < 1.0) afill = 1.0; 697 C->info.mallocs = ndouble; 698 C->info.fill_ratio_given = fill; 699 C->info.fill_ratio_needed = afill; 700 701 #if defined(PETSC_USE_INFO) 702 if (ci[am]) { 703 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 704 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 705 } else { 706 PetscCall(PetscInfo(C,"Empty matrix product\n")); 707 } 708 #endif 709 PetscFunctionReturn(0); 710 } 711 712 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 713 { 714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 715 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 716 PetscInt *ci,*cj,*bb; 717 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 718 PetscReal afill; 719 PetscInt i,j,col,ndouble = 0; 720 PetscFreeSpaceList free_space=NULL,current_space=NULL; 721 PetscHeap h; 722 PetscBT bt; 723 724 PetscFunctionBegin; 725 /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 726 /*---------------------------------------------------------------------------------------------*/ 727 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 728 PetscCall(PetscMalloc1(am+2,&ci)); 729 ci[0] = 0; 730 731 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 732 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 733 734 current_space = free_space; 735 736 PetscCall(PetscHeapCreate(a->rmax,&h)); 737 PetscCall(PetscMalloc1(a->rmax,&bb)); 738 PetscCall(PetscBTCreate(bn,&bt)); 739 740 /* Determine ci and cj */ 741 for (i=0; i<am; i++) { 742 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 743 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 744 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 745 ci[i+1] = ci[i]; 746 /* Populate the min heap */ 747 for (j=0; j<anzi; j++) { 748 PetscInt brow = acol[j]; 749 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 750 PetscInt bcol = bj[bb[j]]; 751 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 752 PetscCall(PetscHeapAdd(h,j,bcol)); 753 bb[j]++; 754 break; 755 } 756 } 757 } 758 /* Pick off the min element, adding it to free space */ 759 PetscCall(PetscHeapPop(h,&j,&col)); 760 while (j >= 0) { 761 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 762 fptr = NULL; /* need PetscBTMemzero */ 763 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 764 ndouble++; 765 } 766 *(current_space->array++) = col; 767 current_space->local_used++; 768 current_space->local_remaining--; 769 ci[i+1]++; 770 771 /* stash if anything else remains in this row of B */ 772 for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 773 PetscInt bcol = bj[bb[j]]; 774 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 775 PetscCall(PetscHeapAdd(h,j,bcol)); 776 bb[j]++; 777 break; 778 } 779 } 780 PetscCall(PetscHeapPop(h,&j,&col)); 781 } 782 if (fptr) { /* Clear the bits for this row */ 783 for (; fptr<current_space->array; fptr++) PetscCall(PetscBTClear(bt,*fptr)); 784 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 785 PetscCall(PetscBTMemzero(bn,bt)); 786 } 787 } 788 PetscCall(PetscFree(bb)); 789 PetscCall(PetscHeapDestroy(&h)); 790 PetscCall(PetscBTDestroy(&bt)); 791 792 /* Column indices are in the list of free space */ 793 /* Allocate space for cj, initialize cj, and */ 794 /* destroy list of free space and other temporary array(s) */ 795 PetscCall(PetscMalloc1(ci[am],&cj)); 796 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 797 798 /* put together the new symbolic matrix */ 799 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 800 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 801 802 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 803 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 804 c = (Mat_SeqAIJ*)(C->data); 805 c->free_a = PETSC_TRUE; 806 c->free_ij = PETSC_TRUE; 807 c->nonew = 0; 808 809 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 810 811 /* set MatInfo */ 812 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 813 if (afill < 1.0) afill = 1.0; 814 C->info.mallocs = ndouble; 815 C->info.fill_ratio_given = fill; 816 C->info.fill_ratio_needed = afill; 817 818 #if defined(PETSC_USE_INFO) 819 if (ci[am]) { 820 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 821 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 822 } else { 823 PetscCall(PetscInfo(C,"Empty matrix product\n")); 824 } 825 #endif 826 PetscFunctionReturn(0); 827 } 828 829 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 830 { 831 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 832 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 833 PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 834 PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 835 const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 836 const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 837 const PetscInt *brow_ptr[8],*brow_end[8]; 838 PetscInt window[8]; 839 PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 840 PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 841 PetscReal afill; 842 PetscInt *workj_L1,*workj_L2,*workj_L3; 843 PetscInt L1_nnz,L2_nnz; 844 845 /* Step 1: Get upper bound on memory required for allocation. 846 Because of the way virtual memory works, 847 only the memory pages that are actually needed will be physically allocated. */ 848 PetscFunctionBegin; 849 PetscCall(PetscMalloc1(am+1,&ci)); 850 for (i=0; i<am; i++) { 851 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 852 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 853 a_rownnz = 0; 854 for (k=0; k<anzi; ++k) { 855 a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 856 if (a_rownnz > bn) { 857 a_rownnz = bn; 858 break; 859 } 860 } 861 a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 862 } 863 /* temporary work areas for merging rows */ 864 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L1)); 865 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L2)); 866 PetscCall(PetscMalloc1(a_maxrownnz,&workj_L3)); 867 868 /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 869 c_maxmem = 8*(ai[am]+bi[bm]); 870 /* Step 2: Populate pattern for C */ 871 PetscCall(PetscMalloc1(c_maxmem,&cj)); 872 873 ci_nnz = 0; 874 ci[0] = 0; 875 worki_L1[0] = 0; 876 worki_L2[0] = 0; 877 for (i=0; i<am; i++) { 878 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 879 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 880 rowsleft = anzi; 881 inputcol_L1 = acol; 882 L2_nnz = 0; 883 L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 884 worki_L2[1] = 0; 885 outputi_nnz = 0; 886 887 /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 888 while (ci_nnz+a_maxrownnz > c_maxmem) { 889 c_maxmem *= 2; 890 ndouble++; 891 PetscCall(PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)); 892 } 893 894 while (rowsleft) { 895 L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 896 L1_nrows = 0; 897 L1_nnz = 0; 898 inputcol = inputcol_L1; 899 inputi = bi; 900 inputj = bj; 901 902 /* The following macro is used to specialize for small rows in A. 903 This helps with compiler unrolling, improving performance substantially. 904 Input: inputj inputi inputcol bn 905 Output: outputj outputi_nnz */ 906 #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 907 window_min = bn; \ 908 outputi_nnz = 0; \ 909 for (k=0; k<ANNZ; ++k) { \ 910 brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 911 brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 912 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 913 window_min = PetscMin(window[k], window_min); \ 914 } \ 915 while (window_min < bn) { \ 916 outputj[outputi_nnz++] = window_min; \ 917 /* advance front and compute new minimum */ \ 918 old_window_min = window_min; \ 919 window_min = bn; \ 920 for (k=0; k<ANNZ; ++k) { \ 921 if (window[k] == old_window_min) { \ 922 brow_ptr[k]++; \ 923 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 924 } \ 925 window_min = PetscMin(window[k], window_min); \ 926 } \ 927 } 928 929 /************** L E V E L 1 ***************/ 930 /* Merge up to 8 rows of B to L1 work array*/ 931 while (L1_rowsleft) { 932 outputi_nnz = 0; 933 if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 934 else outputj = cj + ci_nnz; /* Merge directly to C */ 935 936 switch (L1_rowsleft) { 937 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 938 brow_end[0] = inputj + inputi[inputcol[0]+1]; 939 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 940 inputcol += L1_rowsleft; 941 rowsleft -= L1_rowsleft; 942 L1_rowsleft = 0; 943 break; 944 case 2: MatMatMultSymbolic_RowMergeMacro(2); 945 inputcol += L1_rowsleft; 946 rowsleft -= L1_rowsleft; 947 L1_rowsleft = 0; 948 break; 949 case 3: MatMatMultSymbolic_RowMergeMacro(3); 950 inputcol += L1_rowsleft; 951 rowsleft -= L1_rowsleft; 952 L1_rowsleft = 0; 953 break; 954 case 4: MatMatMultSymbolic_RowMergeMacro(4); 955 inputcol += L1_rowsleft; 956 rowsleft -= L1_rowsleft; 957 L1_rowsleft = 0; 958 break; 959 case 5: MatMatMultSymbolic_RowMergeMacro(5); 960 inputcol += L1_rowsleft; 961 rowsleft -= L1_rowsleft; 962 L1_rowsleft = 0; 963 break; 964 case 6: MatMatMultSymbolic_RowMergeMacro(6); 965 inputcol += L1_rowsleft; 966 rowsleft -= L1_rowsleft; 967 L1_rowsleft = 0; 968 break; 969 case 7: MatMatMultSymbolic_RowMergeMacro(7); 970 inputcol += L1_rowsleft; 971 rowsleft -= L1_rowsleft; 972 L1_rowsleft = 0; 973 break; 974 default: MatMatMultSymbolic_RowMergeMacro(8); 975 inputcol += 8; 976 rowsleft -= 8; 977 L1_rowsleft -= 8; 978 break; 979 } 980 inputcol_L1 = inputcol; 981 L1_nnz += outputi_nnz; 982 worki_L1[++L1_nrows] = L1_nnz; 983 } 984 985 /********************** L E V E L 2 ************************/ 986 /* Merge from L1 work array to either C or to L2 work array */ 987 if (anzi > 8) { 988 inputi = worki_L1; 989 inputj = workj_L1; 990 inputcol = workcol; 991 outputi_nnz = 0; 992 993 if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 994 else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 995 996 switch (L1_nrows) { 997 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 998 brow_end[0] = inputj + inputi[inputcol[0]+1]; 999 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1000 break; 1001 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1002 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1003 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1004 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1005 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1006 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1007 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1008 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 1009 } 1010 L2_nnz += outputi_nnz; 1011 worki_L2[++L2_nrows] = L2_nnz; 1012 1013 /************************ L E V E L 3 **********************/ 1014 /* Merge from L2 work array to either C or to L2 work array */ 1015 if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1016 inputi = worki_L2; 1017 inputj = workj_L2; 1018 inputcol = workcol; 1019 outputi_nnz = 0; 1020 if (rowsleft) outputj = workj_L3; 1021 else outputj = cj + ci_nnz; 1022 switch (L2_nrows) { 1023 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 1024 brow_end[0] = inputj + inputi[inputcol[0]+1]; 1025 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1026 break; 1027 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1028 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1029 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1030 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1031 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1032 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1033 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1034 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1035 } 1036 L2_nrows = 1; 1037 L2_nnz = outputi_nnz; 1038 worki_L2[1] = outputi_nnz; 1039 /* Copy to workj_L2 */ 1040 if (rowsleft) { 1041 for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1042 } 1043 } 1044 } 1045 } /* while (rowsleft) */ 1046 #undef MatMatMultSymbolic_RowMergeMacro 1047 1048 /* terminate current row */ 1049 ci_nnz += outputi_nnz; 1050 ci[i+1] = ci_nnz; 1051 } 1052 1053 /* Step 3: Create the new symbolic matrix */ 1054 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1055 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1056 1057 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1058 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1059 c = (Mat_SeqAIJ*)(C->data); 1060 c->free_a = PETSC_TRUE; 1061 c->free_ij = PETSC_TRUE; 1062 c->nonew = 0; 1063 1064 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1065 1066 /* set MatInfo */ 1067 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1068 if (afill < 1.0) afill = 1.0; 1069 C->info.mallocs = ndouble; 1070 C->info.fill_ratio_given = fill; 1071 C->info.fill_ratio_needed = afill; 1072 1073 #if defined(PETSC_USE_INFO) 1074 if (ci[am]) { 1075 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1076 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1077 } else { 1078 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1079 } 1080 #endif 1081 1082 /* Step 4: Free temporary work areas */ 1083 PetscCall(PetscFree(workj_L1)); 1084 PetscCall(PetscFree(workj_L2)); 1085 PetscCall(PetscFree(workj_L3)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /* concatenate unique entries and then sort */ 1090 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1091 { 1092 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1093 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1094 PetscInt *ci,*cj,bcol; 1095 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1096 PetscReal afill; 1097 PetscInt i,j,ndouble = 0; 1098 PetscSegBuffer seg,segrow; 1099 char *seen; 1100 1101 PetscFunctionBegin; 1102 PetscCall(PetscMalloc1(am+1,&ci)); 1103 ci[0] = 0; 1104 1105 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1106 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg)); 1107 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),100,&segrow)); 1108 PetscCall(PetscCalloc1(bn,&seen)); 1109 1110 /* Determine ci and cj */ 1111 for (i=0; i<am; i++) { 1112 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1113 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1114 PetscInt packlen = 0,*PETSC_RESTRICT crow; 1115 1116 /* Pack segrow */ 1117 for (j=0; j<anzi; j++) { 1118 PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1119 for (k=bjstart; k<bjend; k++) { 1120 bcol = bj[k]; 1121 if (!seen[bcol]) { /* new entry */ 1122 PetscInt *PETSC_RESTRICT slot; 1123 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1124 *slot = bcol; 1125 seen[bcol] = 1; 1126 packlen++; 1127 } 1128 } 1129 } 1130 1131 /* Check i-th diagonal entry */ 1132 if (C->force_diagonals && !seen[i]) { 1133 PetscInt *PETSC_RESTRICT slot; 1134 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1135 *slot = i; 1136 seen[i] = 1; 1137 packlen++; 1138 } 1139 1140 PetscCall(PetscSegBufferGetInts(seg,packlen,&crow)); 1141 PetscCall(PetscSegBufferExtractTo(segrow,crow)); 1142 PetscCall(PetscSortInt(packlen,crow)); 1143 ci[i+1] = ci[i] + packlen; 1144 for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1145 } 1146 PetscCall(PetscSegBufferDestroy(&segrow)); 1147 PetscCall(PetscFree(seen)); 1148 1149 /* Column indices are in the segmented buffer */ 1150 PetscCall(PetscSegBufferExtractAlloc(seg,&cj)); 1151 PetscCall(PetscSegBufferDestroy(&seg)); 1152 1153 /* put together the new symbolic matrix */ 1154 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1155 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1156 1157 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1158 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1159 c = (Mat_SeqAIJ*)(C->data); 1160 c->free_a = PETSC_TRUE; 1161 c->free_ij = PETSC_TRUE; 1162 c->nonew = 0; 1163 1164 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1165 1166 /* set MatInfo */ 1167 afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1168 if (afill < 1.0) afill = 1.0; 1169 C->info.mallocs = ndouble; 1170 C->info.fill_ratio_given = fill; 1171 C->info.fill_ratio_needed = afill; 1172 1173 #if defined(PETSC_USE_INFO) 1174 if (ci[am]) { 1175 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1176 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1177 } else { 1178 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1179 } 1180 #endif 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 1185 { 1186 Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 1187 1188 PetscFunctionBegin; 1189 PetscCall(MatTransposeColoringDestroy(&abt->matcoloring)); 1190 PetscCall(MatDestroy(&abt->Bt_den)); 1191 PetscCall(MatDestroy(&abt->ABt_den)); 1192 PetscCall(PetscFree(abt)); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1197 { 1198 Mat Bt; 1199 Mat_MatMatTransMult *abt; 1200 Mat_Product *product = C->product; 1201 char *alg; 1202 1203 PetscFunctionBegin; 1204 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1205 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1206 1207 /* create symbolic Bt */ 1208 PetscCall(MatTransposeSymbolic(B,&Bt)); 1209 PetscCall(MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1210 PetscCall(MatSetType(Bt,((PetscObject)A)->type_name)); 1211 1212 /* get symbolic C=A*Bt */ 1213 PetscCall(PetscStrallocpy(product->alg,&alg)); 1214 PetscCall(MatProductSetAlgorithm(C,"sorted")); /* set algorithm for C = A*Bt */ 1215 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C)); 1216 PetscCall(MatProductSetAlgorithm(C,alg)); /* resume original algorithm for ABt product */ 1217 PetscCall(PetscFree(alg)); 1218 1219 /* create a supporting struct for reuse intermediate dense matrices with matcoloring */ 1220 PetscCall(PetscNew(&abt)); 1221 1222 product->data = abt; 1223 product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 1224 1225 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 1226 1227 abt->usecoloring = PETSC_FALSE; 1228 PetscCall(PetscStrcmp(product->alg,"color",&abt->usecoloring)); 1229 if (abt->usecoloring) { 1230 /* Create MatTransposeColoring from symbolic C=A*B^T */ 1231 MatTransposeColoring matcoloring; 1232 MatColoring coloring; 1233 ISColoring iscoloring; 1234 Mat Bt_dense,C_dense; 1235 1236 /* inode causes memory problem */ 1237 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 1238 1239 PetscCall(MatColoringCreate(C,&coloring)); 1240 PetscCall(MatColoringSetDistance(coloring,2)); 1241 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 1242 PetscCall(MatColoringSetFromOptions(coloring)); 1243 PetscCall(MatColoringApply(coloring,&iscoloring)); 1244 PetscCall(MatColoringDestroy(&coloring)); 1245 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 1246 1247 abt->matcoloring = matcoloring; 1248 1249 PetscCall(ISColoringDestroy(&iscoloring)); 1250 1251 /* Create Bt_dense and C_dense = A*Bt_dense */ 1252 PetscCall(MatCreate(PETSC_COMM_SELF,&Bt_dense)); 1253 PetscCall(MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 1254 PetscCall(MatSetType(Bt_dense,MATSEQDENSE)); 1255 PetscCall(MatSeqDenseSetPreallocation(Bt_dense,NULL)); 1256 1257 Bt_dense->assembled = PETSC_TRUE; 1258 abt->Bt_den = Bt_dense; 1259 1260 PetscCall(MatCreate(PETSC_COMM_SELF,&C_dense)); 1261 PetscCall(MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors)); 1262 PetscCall(MatSetType(C_dense,MATSEQDENSE)); 1263 PetscCall(MatSeqDenseSetPreallocation(C_dense,NULL)); 1264 1265 Bt_dense->assembled = PETSC_TRUE; 1266 abt->ABt_den = C_dense; 1267 1268 #if defined(PETSC_USE_INFO) 1269 { 1270 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 1271 PetscCall(PetscInfo(C,"Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(double)(((PetscReal)(c->nz))/((PetscReal)(A->rmap->n*matcoloring->ncolors))))); 1272 } 1273 #endif 1274 } 1275 /* clean up */ 1276 PetscCall(MatDestroy(&Bt)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1281 { 1282 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1283 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 1284 PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 1285 PetscLogDouble flops=0.0; 1286 MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 1287 Mat_MatMatTransMult *abt; 1288 Mat_Product *product = C->product; 1289 1290 PetscFunctionBegin; 1291 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1292 abt = (Mat_MatMatTransMult *)product->data; 1293 PetscCheck(abt,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1294 /* clear old values in C */ 1295 if (!c->a) { 1296 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1297 c->a = ca; 1298 c->free_a = PETSC_TRUE; 1299 } else { 1300 ca = c->a; 1301 PetscCall(PetscArrayzero(ca,ci[cm]+1)); 1302 } 1303 1304 if (abt->usecoloring) { 1305 MatTransposeColoring matcoloring = abt->matcoloring; 1306 Mat Bt_dense,C_dense = abt->ABt_den; 1307 1308 /* Get Bt_dense by Apply MatTransposeColoring to B */ 1309 Bt_dense = abt->Bt_den; 1310 PetscCall(MatTransColoringApplySpToDen(matcoloring,B,Bt_dense)); 1311 1312 /* C_dense = A*Bt_dense */ 1313 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense)); 1314 1315 /* Recover C from C_dense */ 1316 PetscCall(MatTransColoringApplyDenToSp(matcoloring,C_dense,C)); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 for (i=0; i<cm; i++) { 1321 anzi = ai[i+1] - ai[i]; 1322 acol = aj + ai[i]; 1323 aval = aa + ai[i]; 1324 cnzi = ci[i+1] - ci[i]; 1325 ccol = cj + ci[i]; 1326 cval = ca + ci[i]; 1327 for (j=0; j<cnzi; j++) { 1328 brow = ccol[j]; 1329 bnzj = bi[brow+1] - bi[brow]; 1330 bcol = bj + bi[brow]; 1331 bval = ba + bi[brow]; 1332 1333 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 1334 nexta = 0; nextb = 0; 1335 while (nexta<anzi && nextb<bnzj) { 1336 while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 1337 if (nexta == anzi) break; 1338 while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 1339 if (nextb == bnzj) break; 1340 if (acol[nexta] == bcol[nextb]) { 1341 cval[j] += aval[nexta]*bval[nextb]; 1342 nexta++; nextb++; 1343 flops += 2; 1344 } 1345 } 1346 } 1347 } 1348 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1349 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1350 PetscCall(PetscLogFlops(flops)); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 1355 { 1356 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 1357 1358 PetscFunctionBegin; 1359 PetscCall(MatDestroy(&atb->At)); 1360 if (atb->destroy) PetscCall((*atb->destroy)(atb->data)); 1361 PetscCall(PetscFree(atb)); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1366 { 1367 Mat At = NULL; 1368 Mat_Product *product = C->product; 1369 PetscBool flg,def,square; 1370 1371 PetscFunctionBegin; 1372 MatCheckProduct(C,4); 1373 square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE); 1374 /* outerproduct */ 1375 PetscCall(PetscStrcmp(product->alg,"outerproduct",&flg)); 1376 if (flg) { 1377 /* create symbolic At */ 1378 if (!square) { 1379 PetscCall(MatTransposeSymbolic(A,&At)); 1380 PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1381 PetscCall(MatSetType(At,((PetscObject)A)->type_name)); 1382 } 1383 /* get symbolic C=At*B */ 1384 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1385 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1386 1387 /* clean up */ 1388 if (!square) { 1389 PetscCall(MatDestroy(&At)); 1390 } 1391 1392 C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 1393 PetscCall(MatProductSetAlgorithm(C,"outerproduct")); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* matmatmult */ 1398 PetscCall(PetscStrcmp(product->alg,"default",&def)); 1399 PetscCall(PetscStrcmp(product->alg,"at*b",&flg)); 1400 if (flg || def) { 1401 Mat_MatTransMatMult *atb; 1402 1403 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1404 PetscCall(PetscNew(&atb)); 1405 if (!square) { 1406 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 1407 } 1408 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1409 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1410 PetscCall(MatProductSetAlgorithm(C,"at*b")); 1411 product->data = atb; 1412 product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 1413 atb->At = At; 1414 1415 C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 1416 PetscFunctionReturn(0); 1417 } 1418 1419 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1420 } 1421 1422 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1423 { 1424 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1425 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1426 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1427 PetscLogDouble flops=0.0; 1428 MatScalar *aa=a->a,*ba,*ca,*caj; 1429 1430 PetscFunctionBegin; 1431 if (!c->a) { 1432 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1433 1434 c->a = ca; 1435 c->free_a = PETSC_TRUE; 1436 } else { 1437 ca = c->a; 1438 PetscCall(PetscArrayzero(ca,ci[cm])); 1439 } 1440 1441 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1442 for (i=0; i<am; i++) { 1443 bj = b->j + bi[i]; 1444 ba = b->a + bi[i]; 1445 bnzi = bi[i+1] - bi[i]; 1446 anzi = ai[i+1] - ai[i]; 1447 for (j=0; j<anzi; j++) { 1448 nextb = 0; 1449 crow = *aj++; 1450 cjj = cj + ci[crow]; 1451 caj = ca + ci[crow]; 1452 /* perform sparse axpy operation. Note cjj includes bj. */ 1453 for (k=0; nextb<bnzi; k++) { 1454 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1455 caj[k] += (*aa)*(*(ba+nextb)); 1456 nextb++; 1457 } 1458 } 1459 flops += 2*bnzi; 1460 aa++; 1461 } 1462 } 1463 1464 /* Assemble the final matrix and clean up */ 1465 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1466 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1467 PetscCall(PetscLogFlops(flops)); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 1472 { 1473 PetscFunctionBegin; 1474 PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C)); 1475 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 1476 PetscFunctionReturn(0); 1477 } 1478 1479 PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 1480 { 1481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1482 PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1483 const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1484 const PetscInt *aj; 1485 PetscInt cm=C->rmap->n,cn=B->cmap->n,bm,am=A->rmap->n; 1486 PetscInt clda; 1487 PetscInt am4,bm4,col,i,j,n; 1488 1489 PetscFunctionBegin; 1490 if (!cm || !cn) PetscFunctionReturn(0); 1491 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 1492 if (add) { 1493 PetscCall(MatDenseGetArray(C,&c)); 1494 } else { 1495 PetscCall(MatDenseGetArrayWrite(C,&c)); 1496 } 1497 PetscCall(MatDenseGetArrayRead(B,&b)); 1498 PetscCall(MatDenseGetLDA(B,&bm)); 1499 PetscCall(MatDenseGetLDA(C,&clda)); 1500 am4 = 4*clda; 1501 bm4 = 4*bm; 1502 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1503 c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 1504 for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 1505 for (i=0; i<am; i++) { /* over rows of A in those columns */ 1506 r1 = r2 = r3 = r4 = 0.0; 1507 n = a->i[i+1] - a->i[i]; 1508 aj = a->j + a->i[i]; 1509 aa = av + a->i[i]; 1510 for (j=0; j<n; j++) { 1511 const PetscScalar aatmp = aa[j]; 1512 const PetscInt ajtmp = aj[j]; 1513 r1 += aatmp*b1[ajtmp]; 1514 r2 += aatmp*b2[ajtmp]; 1515 r3 += aatmp*b3[ajtmp]; 1516 r4 += aatmp*b4[ajtmp]; 1517 } 1518 if (add) { 1519 c1[i] += r1; 1520 c2[i] += r2; 1521 c3[i] += r3; 1522 c4[i] += r4; 1523 } else { 1524 c1[i] = r1; 1525 c2[i] = r2; 1526 c3[i] = r3; 1527 c4[i] = r4; 1528 } 1529 } 1530 b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1531 c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1532 } 1533 /* process remaining columns */ 1534 if (col != cn) { 1535 PetscInt rc = cn-col; 1536 1537 if (rc == 1) { 1538 for (i=0; i<am; i++) { 1539 r1 = 0.0; 1540 n = a->i[i+1] - a->i[i]; 1541 aj = a->j + a->i[i]; 1542 aa = av + a->i[i]; 1543 for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 1544 if (add) c1[i] += r1; 1545 else c1[i] = r1; 1546 } 1547 } else if (rc == 2) { 1548 for (i=0; i<am; i++) { 1549 r1 = r2 = 0.0; 1550 n = a->i[i+1] - a->i[i]; 1551 aj = a->j + a->i[i]; 1552 aa = av + a->i[i]; 1553 for (j=0; j<n; j++) { 1554 const PetscScalar aatmp = aa[j]; 1555 const PetscInt ajtmp = aj[j]; 1556 r1 += aatmp*b1[ajtmp]; 1557 r2 += aatmp*b2[ajtmp]; 1558 } 1559 if (add) { 1560 c1[i] += r1; 1561 c2[i] += r2; 1562 } else { 1563 c1[i] = r1; 1564 c2[i] = r2; 1565 } 1566 } 1567 } else { 1568 for (i=0; i<am; i++) { 1569 r1 = r2 = r3 = 0.0; 1570 n = a->i[i+1] - a->i[i]; 1571 aj = a->j + a->i[i]; 1572 aa = av + a->i[i]; 1573 for (j=0; j<n; j++) { 1574 const PetscScalar aatmp = aa[j]; 1575 const PetscInt ajtmp = aj[j]; 1576 r1 += aatmp*b1[ajtmp]; 1577 r2 += aatmp*b2[ajtmp]; 1578 r3 += aatmp*b3[ajtmp]; 1579 } 1580 if (add) { 1581 c1[i] += r1; 1582 c2[i] += r2; 1583 c3[i] += r3; 1584 } else { 1585 c1[i] = r1; 1586 c2[i] = r2; 1587 c3[i] = r3; 1588 } 1589 } 1590 } 1591 } 1592 PetscCall(PetscLogFlops(cn*(2.0*a->nz))); 1593 if (add) { 1594 PetscCall(MatDenseRestoreArray(C,&c)); 1595 } else { 1596 PetscCall(MatDenseRestoreArrayWrite(C,&c)); 1597 } 1598 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1599 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1604 { 1605 PetscFunctionBegin; 1606 PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n); 1607 PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n); 1608 PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n); 1609 1610 PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE)); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /* ------------------------------------------------------- */ 1615 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 1616 { 1617 PetscFunctionBegin; 1618 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 1619 C->ops->productsymbolic = MatProductSymbolic_AB; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 1624 1625 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 1626 { 1627 PetscFunctionBegin; 1628 C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1629 C->ops->productsymbolic = MatProductSymbolic_AtB; 1630 PetscFunctionReturn(0); 1631 } 1632 1633 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 1634 { 1635 PetscFunctionBegin; 1636 C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1637 C->ops->productsymbolic = MatProductSymbolic_ABt; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 1642 { 1643 Mat_Product *product = C->product; 1644 1645 PetscFunctionBegin; 1646 switch (product->type) { 1647 case MATPRODUCT_AB: 1648 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C)); 1649 break; 1650 case MATPRODUCT_AtB: 1651 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C)); 1652 break; 1653 case MATPRODUCT_ABt: 1654 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C)); 1655 break; 1656 default: 1657 break; 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 /* ------------------------------------------------------- */ 1662 static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 1663 { 1664 Mat_Product *product = C->product; 1665 Mat A = product->A; 1666 PetscBool baij; 1667 1668 PetscFunctionBegin; 1669 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij)); 1670 if (!baij) { /* A is seqsbaij */ 1671 PetscBool sbaij; 1672 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij)); 1673 PetscCheck(sbaij,PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 1674 1675 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 1676 } else { /* A is seqbaij */ 1677 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 1678 } 1679 1680 C->ops->productsymbolic = MatProductSymbolic_AB; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 1685 { 1686 Mat_Product *product = C->product; 1687 1688 PetscFunctionBegin; 1689 MatCheckProduct(C,1); 1690 PetscCheck(product->A,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 1691 if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C)); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 /* ------------------------------------------------------- */ 1696 static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 1697 { 1698 PetscFunctionBegin; 1699 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 1700 C->ops->productsymbolic = MatProductSymbolic_AB; 1701 PetscFunctionReturn(0); 1702 } 1703 1704 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 1705 { 1706 Mat_Product *product = C->product; 1707 1708 PetscFunctionBegin; 1709 if (product->type == MATPRODUCT_AB) { 1710 PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C)); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 /* ------------------------------------------------------- */ 1715 1716 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1717 { 1718 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1719 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1720 PetscInt *bi = b->i,*bj=b->j; 1721 PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1722 MatScalar *btval,*btval_den,*ba=b->a; 1723 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1724 1725 PetscFunctionBegin; 1726 btval_den=btdense->v; 1727 PetscCall(PetscArrayzero(btval_den,m*n)); 1728 for (k=0; k<ncolors; k++) { 1729 ncolumns = coloring->ncolumns[k]; 1730 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1731 col = *(columns + colorforcol[k] + l); 1732 btcol = bj + bi[col]; 1733 btval = ba + bi[col]; 1734 anz = bi[col+1] - bi[col]; 1735 for (j=0; j<anz; j++) { 1736 brow = btcol[j]; 1737 btval_den[brow] = btval[j]; 1738 } 1739 } 1740 btval_den += m; 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1746 { 1747 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1748 const PetscScalar *ca_den,*ca_den_ptr; 1749 PetscScalar *ca=csp->a; 1750 PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1751 PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1752 PetscInt nrows,*row,*idx; 1753 PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 1754 1755 PetscFunctionBegin; 1756 PetscCall(MatDenseGetArrayRead(Cden,&ca_den)); 1757 1758 if (brows > 0) { 1759 PetscInt *lstart,row_end,row_start; 1760 lstart = matcoloring->lstart; 1761 PetscCall(PetscArrayzero(lstart,ncolors)); 1762 1763 row_end = brows; 1764 if (row_end > m) row_end = m; 1765 for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1766 ca_den_ptr = ca_den; 1767 for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1768 nrows = matcoloring->nrows[k]; 1769 row = rows + colorforrow[k]; 1770 idx = den2sp + colorforrow[k]; 1771 for (l=lstart[k]; l<nrows; l++) { 1772 if (row[l] >= row_end) { 1773 lstart[k] = l; 1774 break; 1775 } else { 1776 ca[idx[l]] = ca_den_ptr[row[l]]; 1777 } 1778 } 1779 ca_den_ptr += m; 1780 } 1781 row_end += brows; 1782 if (row_end > m) row_end = m; 1783 } 1784 } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1785 ca_den_ptr = ca_den; 1786 for (k=0; k<ncolors; k++) { 1787 nrows = matcoloring->nrows[k]; 1788 row = rows + colorforrow[k]; 1789 idx = den2sp + colorforrow[k]; 1790 for (l=0; l<nrows; l++) { 1791 ca[idx[l]] = ca_den_ptr[row[l]]; 1792 } 1793 ca_den_ptr += m; 1794 } 1795 } 1796 1797 PetscCall(MatDenseRestoreArrayRead(Cden,&ca_den)); 1798 #if defined(PETSC_USE_INFO) 1799 if (matcoloring->brows > 0) { 1800 PetscCall(PetscInfo(Csp,"Loop over %" PetscInt_FMT " row blocks for den2sp\n",brows)); 1801 } else { 1802 PetscCall(PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")); 1803 } 1804 #endif 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1809 { 1810 PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 1811 const PetscInt *is,*ci,*cj,*row_idx; 1812 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1813 IS *isa; 1814 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1815 PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1816 PetscInt *colorforcol,*columns,*columns_i,brows; 1817 PetscBool flg; 1818 1819 PetscFunctionBegin; 1820 PetscCall(ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa)); 1821 1822 /* bs >1 is not being tested yet! */ 1823 Nbs = mat->cmap->N/bs; 1824 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1825 c->N = Nbs; 1826 c->m = c->M; 1827 c->rstart = 0; 1828 c->brows = 100; 1829 1830 c->ncolors = nis; 1831 PetscCall(PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow)); 1832 PetscCall(PetscMalloc1(csp->nz+1,&rows)); 1833 PetscCall(PetscMalloc1(csp->nz+1,&den2sp)); 1834 1835 brows = c->brows; 1836 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg)); 1837 if (flg) c->brows = brows; 1838 if (brows > 0) { 1839 PetscCall(PetscMalloc1(nis+1,&c->lstart)); 1840 } 1841 1842 colorforrow[0] = 0; 1843 rows_i = rows; 1844 den2sp_i = den2sp; 1845 1846 PetscCall(PetscMalloc1(nis+1,&colorforcol)); 1847 PetscCall(PetscMalloc1(Nbs+1,&columns)); 1848 1849 colorforcol[0] = 0; 1850 columns_i = columns; 1851 1852 /* get column-wise storage of mat */ 1853 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1854 1855 cm = c->m; 1856 PetscCall(PetscMalloc1(cm+1,&rowhit)); 1857 PetscCall(PetscMalloc1(cm+1,&idxhit)); 1858 for (i=0; i<nis; i++) { /* loop over color */ 1859 PetscCall(ISGetLocalSize(isa[i],&n)); 1860 PetscCall(ISGetIndices(isa[i],&is)); 1861 1862 c->ncolumns[i] = n; 1863 if (n) PetscCall(PetscArraycpy(columns_i,is,n)); 1864 colorforcol[i+1] = colorforcol[i] + n; 1865 columns_i += n; 1866 1867 /* fast, crude version requires O(N*N) work */ 1868 PetscCall(PetscArrayzero(rowhit,cm)); 1869 1870 for (j=0; j<n; j++) { /* loop over columns*/ 1871 col = is[j]; 1872 row_idx = cj + ci[col]; 1873 m = ci[col+1] - ci[col]; 1874 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1875 idxhit[*row_idx] = spidx[ci[col] + k]; 1876 rowhit[*row_idx++] = col + 1; 1877 } 1878 } 1879 /* count the number of hits */ 1880 nrows = 0; 1881 for (j=0; j<cm; j++) { 1882 if (rowhit[j]) nrows++; 1883 } 1884 c->nrows[i] = nrows; 1885 colorforrow[i+1] = colorforrow[i] + nrows; 1886 1887 nrows = 0; 1888 for (j=0; j<cm; j++) { /* loop over rows */ 1889 if (rowhit[j]) { 1890 rows_i[nrows] = j; 1891 den2sp_i[nrows] = idxhit[j]; 1892 nrows++; 1893 } 1894 } 1895 den2sp_i += nrows; 1896 1897 PetscCall(ISRestoreIndices(isa[i],&is)); 1898 rows_i += nrows; 1899 } 1900 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1901 PetscCall(PetscFree(rowhit)); 1902 PetscCall(ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa)); 1903 PetscCheck(csp->nz == colorforrow[nis],PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT,csp->nz,colorforrow[nis]); 1904 1905 c->colorforrow = colorforrow; 1906 c->rows = rows; 1907 c->den2sp = den2sp; 1908 c->colorforcol = colorforcol; 1909 c->columns = columns; 1910 1911 PetscCall(PetscFree(idxhit)); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 /* --------------------------------------------------------------- */ 1916 static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1917 { 1918 Mat_Product *product = C->product; 1919 Mat A=product->A,B=product->B; 1920 1921 PetscFunctionBegin; 1922 if (C->ops->mattransposemultnumeric) { 1923 /* Alg: "outerproduct" */ 1924 PetscCall((*C->ops->mattransposemultnumeric)(A,B,C)); 1925 } else { 1926 /* Alg: "matmatmult" -- C = At*B */ 1927 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 1928 1929 PetscCheck(atb,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1930 if (atb->At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */ 1931 PetscCall(MatTranspose(A,MAT_REUSE_MATRIX,&atb->At)); 1932 } 1933 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A,B,C)); 1934 } 1935 PetscFunctionReturn(0); 1936 } 1937 1938 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1939 { 1940 Mat_Product *product = C->product; 1941 Mat A=product->A,B=product->B; 1942 PetscReal fill=product->fill; 1943 1944 PetscFunctionBegin; 1945 PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C)); 1946 1947 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* --------------------------------------------------------------- */ 1952 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 1953 { 1954 Mat_Product *product = C->product; 1955 PetscInt alg = 0; /* default algorithm */ 1956 PetscBool flg = PETSC_FALSE; 1957 #if !defined(PETSC_HAVE_HYPRE) 1958 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 1959 PetscInt nalg = 7; 1960 #else 1961 const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 1962 PetscInt nalg = 8; 1963 #endif 1964 1965 PetscFunctionBegin; 1966 /* Set default algorithm */ 1967 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 1968 if (flg) { 1969 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1970 } 1971 1972 /* Get runtime option */ 1973 if (product->api_user) { 1974 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 1975 PetscCall(PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg)); 1976 PetscOptionsEnd(); 1977 } else { 1978 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 1979 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg)); 1980 PetscOptionsEnd(); 1981 } 1982 if (flg) { 1983 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1984 } 1985 1986 C->ops->productsymbolic = MatProductSymbolic_AB; 1987 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 1988 PetscFunctionReturn(0); 1989 } 1990 1991 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 1992 { 1993 Mat_Product *product = C->product; 1994 PetscInt alg = 0; /* default algorithm */ 1995 PetscBool flg = PETSC_FALSE; 1996 const char *algTypes[3] = {"default","at*b","outerproduct"}; 1997 PetscInt nalg = 3; 1998 1999 PetscFunctionBegin; 2000 /* Get runtime option */ 2001 if (product->api_user) { 2002 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat"); 2003 PetscCall(PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2004 PetscOptionsEnd(); 2005 } else { 2006 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat"); 2007 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg)); 2008 PetscOptionsEnd(); 2009 } 2010 if (flg) { 2011 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2012 } 2013 2014 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 2019 { 2020 Mat_Product *product = C->product; 2021 PetscInt alg = 0; /* default algorithm */ 2022 PetscBool flg = PETSC_FALSE; 2023 const char *algTypes[2] = {"default","color"}; 2024 PetscInt nalg = 2; 2025 2026 PetscFunctionBegin; 2027 /* Set default algorithm */ 2028 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 2029 if (!flg) { 2030 alg = 1; 2031 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2032 } 2033 2034 /* Get runtime option */ 2035 if (product->api_user) { 2036 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2037 PetscCall(PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2038 PetscOptionsEnd(); 2039 } else { 2040 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2041 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2042 PetscOptionsEnd(); 2043 } 2044 if (flg) { 2045 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2046 } 2047 2048 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 2049 C->ops->productsymbolic = MatProductSymbolic_ABt; 2050 PetscFunctionReturn(0); 2051 } 2052 2053 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 2054 { 2055 Mat_Product *product = C->product; 2056 PetscBool flg = PETSC_FALSE; 2057 PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 2058 #if !defined(PETSC_HAVE_HYPRE) 2059 const char *algTypes[2] = {"scalable","rap"}; 2060 PetscInt nalg = 2; 2061 #else 2062 const char *algTypes[3] = {"scalable","rap","hypre"}; 2063 PetscInt nalg = 3; 2064 #endif 2065 2066 PetscFunctionBegin; 2067 /* Set default algorithm */ 2068 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2069 if (flg) { 2070 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2071 } 2072 2073 /* Get runtime option */ 2074 if (product->api_user) { 2075 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 2076 PetscCall(PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2077 PetscOptionsEnd(); 2078 } else { 2079 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 2080 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2081 PetscOptionsEnd(); 2082 } 2083 if (flg) { 2084 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2085 } 2086 2087 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 2088 PetscFunctionReturn(0); 2089 } 2090 2091 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 2092 { 2093 Mat_Product *product = C->product; 2094 PetscBool flg = PETSC_FALSE; 2095 PetscInt alg = 0; /* default algorithm */ 2096 const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 2097 PetscInt nalg = 3; 2098 2099 PetscFunctionBegin; 2100 /* Set default algorithm */ 2101 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2102 if (flg) { 2103 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2104 } 2105 2106 /* Get runtime option */ 2107 if (product->api_user) { 2108 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat"); 2109 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2110 PetscOptionsEnd(); 2111 } else { 2112 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat"); 2113 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2114 PetscOptionsEnd(); 2115 } 2116 if (flg) { 2117 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2118 } 2119 2120 C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 2125 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 2126 { 2127 Mat_Product *product = C->product; 2128 PetscInt alg = 0; /* default algorithm */ 2129 PetscBool flg = PETSC_FALSE; 2130 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 2131 PetscInt nalg = 7; 2132 2133 PetscFunctionBegin; 2134 /* Set default algorithm */ 2135 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2136 if (flg) { 2137 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2138 } 2139 2140 /* Get runtime option */ 2141 if (product->api_user) { 2142 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat"); 2143 PetscCall(PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2144 PetscOptionsEnd(); 2145 } else { 2146 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat"); 2147 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg)); 2148 PetscOptionsEnd(); 2149 } 2150 if (flg) { 2151 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2152 } 2153 2154 C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 2155 C->ops->productsymbolic = MatProductSymbolic_ABC; 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 2160 { 2161 Mat_Product *product = C->product; 2162 2163 PetscFunctionBegin; 2164 switch (product->type) { 2165 case MATPRODUCT_AB: 2166 PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C)); 2167 break; 2168 case MATPRODUCT_AtB: 2169 PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C)); 2170 break; 2171 case MATPRODUCT_ABt: 2172 PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C)); 2173 break; 2174 case MATPRODUCT_PtAP: 2175 PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C)); 2176 break; 2177 case MATPRODUCT_RARt: 2178 PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C)); 2179 break; 2180 case MATPRODUCT_ABC: 2181 PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C)); 2182 break; 2183 default: 2184 break; 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188