1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <petsc/private/isimpl.h> 11 #include <../src/mat/impls/dense/seq/dense.h> 12 13 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14 { 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 if (C->ops->matmultnumeric) { 19 if (C->ops->matmultnumeric == MatMatMultNumeric_SeqAIJ_SeqAIJ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call"); 20 ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr); 21 } else { 22 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr); 23 } 24 PetscFunctionReturn(0); 25 } 26 27 /* Modified from MatCreateSeqAIJWithArrays() */ 28 PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 29 { 30 PetscErrorCode ierr; 31 PetscInt ii; 32 Mat_SeqAIJ *aij; 33 PetscBool isseqaij; 34 35 PetscFunctionBegin; 36 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 37 ierr = MatSetSizes(mat,m,n,m,n);CHKERRQ(ierr); 38 39 if (!mtype) { 40 ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 41 if (!isseqaij) { ierr = MatSetType(mat,MATSEQAIJ);CHKERRQ(ierr); } 42 } else { 43 ierr = MatSetType(mat,mtype);CHKERRQ(ierr); 44 } 45 ierr = MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 46 aij = (Mat_SeqAIJ*)(mat)->data; 47 ierr = PetscMalloc1(m,&aij->imax);CHKERRQ(ierr); 48 ierr = PetscMalloc1(m,&aij->ilen);CHKERRQ(ierr); 49 50 aij->i = i; 51 aij->j = j; 52 aij->a = a; 53 aij->singlemalloc = PETSC_FALSE; 54 aij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 55 aij->free_a = PETSC_FALSE; 56 aij->free_ij = PETSC_FALSE; 57 58 for (ii=0; ii<m; ii++) { 59 aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 60 } 61 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 66 { 67 PetscErrorCode ierr; 68 Mat_Product *product = C->product; 69 MatProductAlgorithm alg; 70 PetscBool flg; 71 72 PetscFunctionBegin; 73 if (product) { 74 alg = product->alg; 75 } else { 76 alg = "sorted"; 77 } 78 /* sorted */ 79 ierr = PetscStrcmp(alg,"sorted",&flg);CHKERRQ(ierr); 80 if (flg) { 81 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 /* scalable */ 86 ierr = PetscStrcmp(alg,"scalable",&flg);CHKERRQ(ierr); 87 if (flg) { 88 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 /* scalable_fast */ 93 ierr = PetscStrcmp(alg,"scalable_fast",&flg);CHKERRQ(ierr); 94 if (flg) { 95 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* heap */ 100 ierr = PetscStrcmp(alg,"heap",&flg);CHKERRQ(ierr); 101 if (flg) { 102 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 /* btheap */ 107 ierr = PetscStrcmp(alg,"btheap",&flg);CHKERRQ(ierr); 108 if (flg) { 109 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 /* llcondensed */ 114 ierr = PetscStrcmp(alg,"llcondensed",&flg);CHKERRQ(ierr); 115 if (flg) { 116 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 /* rowmerge */ 121 ierr = PetscStrcmp(alg,"rowmerge",&flg);CHKERRQ(ierr); 122 if (flg) { 123 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #if defined(PETSC_HAVE_HYPRE) 128 ierr = PetscStrcmp(alg,"hypre",&flg);CHKERRQ(ierr); 129 if (flg) { 130 ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 #endif 134 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 136 } 137 138 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 139 { 140 PetscErrorCode ierr; 141 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 142 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 143 PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 144 PetscReal afill; 145 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 146 PetscTable ta; 147 PetscBT lnkbt; 148 PetscFreeSpaceList free_space=NULL,current_space=NULL; 149 150 PetscFunctionBegin; 151 /* Get ci and cj */ 152 /*---------------*/ 153 /* Allocate ci array, arrays for fill computation and */ 154 /* free space for accumulating nonzero column info */ 155 ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 156 ci[0] = 0; 157 158 /* create and initialize a linked list */ 159 ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 160 MatRowMergeMax_SeqAIJ(b,bm,ta); 161 ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 162 ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 163 164 ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr); 165 166 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 167 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 168 169 current_space = free_space; 170 171 /* Determine ci and cj */ 172 for (i=0; i<am; i++) { 173 anzi = ai[i+1] - ai[i]; 174 aj = a->j + ai[i]; 175 for (j=0; j<anzi; j++) { 176 brow = aj[j]; 177 bnzj = bi[brow+1] - bi[brow]; 178 bj = b->j + bi[brow]; 179 /* add non-zero cols of B into the sorted linked list lnk */ 180 ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 181 } 182 cnzi = lnk[0]; 183 184 /* If free space is not available, make more free space */ 185 /* Double the amount of total space in the list */ 186 if (current_space->local_remaining<cnzi) { 187 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 188 ndouble++; 189 } 190 191 /* Copy data into free space, then initialize lnk */ 192 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 193 194 current_space->array += cnzi; 195 current_space->local_used += cnzi; 196 current_space->local_remaining -= cnzi; 197 198 ci[i+1] = ci[i] + cnzi; 199 } 200 201 /* Column indices are in the list of free space */ 202 /* Allocate space for cj, initialize cj, and */ 203 /* destroy list of free space and other temporary array(s) */ 204 ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 205 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 206 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 207 208 /* put together the new symbolic matrix */ 209 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 210 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 211 212 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 213 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 214 c = (Mat_SeqAIJ*)(C->data); 215 c->free_a = PETSC_FALSE; 216 c->free_ij = PETSC_TRUE; 217 c->nonew = 0; 218 219 /* fast, needs non-scalable O(bn) array 'abdense' */ 220 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 221 222 /* set MatInfo */ 223 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 224 if (afill < 1.0) afill = 1.0; 225 c->maxnz = ci[am]; 226 c->nz = ci[am]; 227 C->info.mallocs = ndouble; 228 C->info.fill_ratio_given = fill; 229 C->info.fill_ratio_needed = afill; 230 231 #if defined(PETSC_USE_INFO) 232 if (ci[am]) { 233 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 234 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 235 } else { 236 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 237 } 238 #endif 239 PetscFunctionReturn(0); 240 } 241 242 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 243 { 244 PetscErrorCode ierr; 245 PetscLogDouble flops=0.0; 246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 247 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 248 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 249 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 250 PetscInt am =A->rmap->n,cm=C->rmap->n; 251 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 252 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 253 PetscScalar *ab_dense; 254 PetscContainer cab_dense; 255 256 PetscFunctionBegin; 257 if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 258 ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 259 c->a = ca; 260 c->free_a = PETSC_TRUE; 261 } else ca = c->a; 262 263 /* TODO this should be done in the symbolic phase */ 264 /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 265 that is hard to eradicate) */ 266 ierr = PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense);CHKERRQ(ierr); 267 if (!cab_dense) { 268 ierr = PetscMalloc1(B->cmap->N,&ab_dense);CHKERRQ(ierr); 269 ierr = PetscContainerCreate(PETSC_COMM_SELF,&cab_dense);CHKERRQ(ierr); 270 ierr = PetscContainerSetPointer(cab_dense,ab_dense);CHKERRQ(ierr); 271 ierr = PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 272 ierr = PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense);CHKERRQ(ierr); 273 ierr = PetscObjectDereference((PetscObject)cab_dense);CHKERRQ(ierr); 274 } 275 ierr = PetscContainerGetPointer(cab_dense,(void**)&ab_dense);CHKERRQ(ierr); 276 ierr = PetscArrayzero(ab_dense,B->cmap->N);CHKERRQ(ierr); 277 278 /* clean old values in C */ 279 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 280 /* Traverse A row-wise. */ 281 /* Build the ith row in C by summing over nonzero columns in A, */ 282 /* the rows of B corresponding to nonzeros of A. */ 283 for (i=0; i<am; i++) { 284 anzi = ai[i+1] - ai[i]; 285 for (j=0; j<anzi; j++) { 286 brow = aj[j]; 287 bnzi = bi[brow+1] - bi[brow]; 288 bjj = bj + bi[brow]; 289 baj = ba + bi[brow]; 290 /* perform dense axpy */ 291 valtmp = aa[j]; 292 for (k=0; k<bnzi; k++) { 293 ab_dense[bjj[k]] += valtmp*baj[k]; 294 } 295 flops += 2*bnzi; 296 } 297 aj += anzi; aa += anzi; 298 299 cnzi = ci[i+1] - ci[i]; 300 for (k=0; k<cnzi; k++) { 301 ca[k] += ab_dense[cj[k]]; 302 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 303 } 304 flops += cnzi; 305 cj += cnzi; ca += cnzi; 306 } 307 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 308 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 314 { 315 PetscErrorCode ierr; 316 PetscLogDouble flops=0.0; 317 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 318 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 319 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 320 PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 321 PetscInt am = A->rmap->N,cm=C->rmap->N; 322 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 323 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 324 PetscInt nextb; 325 326 PetscFunctionBegin; 327 if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 328 ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 329 c->a = ca; 330 c->free_a = PETSC_TRUE; 331 } 332 333 /* clean old values in C */ 334 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 335 /* Traverse A row-wise. */ 336 /* Build the ith row in C by summing over nonzero columns in A, */ 337 /* the rows of B corresponding to nonzeros of A. */ 338 for (i=0; i<am; i++) { 339 anzi = ai[i+1] - ai[i]; 340 cnzi = ci[i+1] - ci[i]; 341 for (j=0; j<anzi; j++) { 342 brow = aj[j]; 343 bnzi = bi[brow+1] - bi[brow]; 344 bjj = bj + bi[brow]; 345 baj = ba + bi[brow]; 346 /* perform sparse axpy */ 347 valtmp = aa[j]; 348 nextb = 0; 349 for (k=0; nextb<bnzi; k++) { 350 if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 351 ca[k] += valtmp*baj[nextb++]; 352 } 353 } 354 flops += 2*bnzi; 355 } 356 aj += anzi; aa += anzi; 357 cj += cnzi; ca += cnzi; 358 } 359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 361 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 366 { 367 PetscErrorCode ierr; 368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 369 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 370 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 371 MatScalar *ca; 372 PetscReal afill; 373 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 374 PetscTable ta; 375 PetscFreeSpaceList free_space=NULL,current_space=NULL; 376 377 PetscFunctionBegin; 378 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 379 /*-----------------------------------------------------------------------------------------*/ 380 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 381 ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 382 ci[0] = 0; 383 384 /* create and initialize a linked list */ 385 ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 386 MatRowMergeMax_SeqAIJ(b,bm,ta); 387 ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 388 ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 389 390 ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr); 391 392 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 393 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 394 current_space = free_space; 395 396 /* Determine ci and cj */ 397 for (i=0; i<am; i++) { 398 anzi = ai[i+1] - ai[i]; 399 aj = a->j + ai[i]; 400 for (j=0; j<anzi; j++) { 401 brow = aj[j]; 402 bnzj = bi[brow+1] - bi[brow]; 403 bj = b->j + bi[brow]; 404 /* add non-zero cols of B into the sorted linked list lnk */ 405 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 406 } 407 cnzi = lnk[1]; 408 409 /* If free space is not available, make more free space */ 410 /* Double the amount of total space in the list */ 411 if (current_space->local_remaining<cnzi) { 412 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 413 ndouble++; 414 } 415 416 /* Copy data into free space, then initialize lnk */ 417 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 418 419 current_space->array += cnzi; 420 current_space->local_used += cnzi; 421 current_space->local_remaining -= cnzi; 422 423 ci[i+1] = ci[i] + cnzi; 424 } 425 426 /* Column indices are in the list of free space */ 427 /* Allocate space for cj, initialize cj, and */ 428 /* destroy list of free space and other temporary array(s) */ 429 ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 430 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 431 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 432 433 /* Allocate space for ca */ 434 ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 435 436 /* put together the new symbolic matrix */ 437 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 438 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 439 440 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 441 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 442 c = (Mat_SeqAIJ*)(C->data); 443 c->free_a = PETSC_TRUE; 444 c->free_ij = PETSC_TRUE; 445 c->nonew = 0; 446 447 /* slower, less memory */ 448 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 449 450 /* set MatInfo */ 451 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 452 if (afill < 1.0) afill = 1.0; 453 c->maxnz = ci[am]; 454 c->nz = ci[am]; 455 C->info.mallocs = ndouble; 456 C->info.fill_ratio_given = fill; 457 C->info.fill_ratio_needed = afill; 458 459 #if defined(PETSC_USE_INFO) 460 if (ci[am]) { 461 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 462 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 463 } else { 464 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 465 } 466 #endif 467 PetscFunctionReturn(0); 468 } 469 470 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 471 { 472 PetscErrorCode ierr; 473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 474 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 475 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 476 MatScalar *ca; 477 PetscReal afill; 478 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 479 PetscTable ta; 480 PetscFreeSpaceList free_space=NULL,current_space=NULL; 481 482 PetscFunctionBegin; 483 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 484 /*---------------------------------------------------------------------------------------------*/ 485 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 486 ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 487 ci[0] = 0; 488 489 /* create and initialize a linked list */ 490 ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 491 MatRowMergeMax_SeqAIJ(b,bm,ta); 492 ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 493 ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 494 ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr); 495 496 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 497 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 498 current_space = free_space; 499 500 /* Determine ci and cj */ 501 for (i=0; i<am; i++) { 502 anzi = ai[i+1] - ai[i]; 503 aj = a->j + ai[i]; 504 for (j=0; j<anzi; j++) { 505 brow = aj[j]; 506 bnzj = bi[brow+1] - bi[brow]; 507 bj = b->j + bi[brow]; 508 /* add non-zero cols of B into the sorted linked list lnk */ 509 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 510 } 511 cnzi = lnk[0]; 512 513 /* If free space is not available, make more free space */ 514 /* Double the amount of total space in the list */ 515 if (current_space->local_remaining<cnzi) { 516 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 517 ndouble++; 518 } 519 520 /* Copy data into free space, then initialize lnk */ 521 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 522 523 current_space->array += cnzi; 524 current_space->local_used += cnzi; 525 current_space->local_remaining -= cnzi; 526 527 ci[i+1] = ci[i] + cnzi; 528 } 529 530 /* Column indices are in the list of free space */ 531 /* Allocate space for cj, initialize cj, and */ 532 /* destroy list of free space and other temporary array(s) */ 533 ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 534 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 535 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 536 537 /* Allocate space for ca */ 538 /*-----------------------*/ 539 ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 540 541 /* put together the new symbolic matrix */ 542 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 543 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 544 545 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 546 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 547 c = (Mat_SeqAIJ*)(C->data); 548 c->free_a = PETSC_TRUE; 549 c->free_ij = PETSC_TRUE; 550 c->nonew = 0; 551 552 /* slower, less memory */ 553 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 554 555 /* set MatInfo */ 556 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 557 if (afill < 1.0) afill = 1.0; 558 c->maxnz = ci[am]; 559 c->nz = ci[am]; 560 C->info.mallocs = ndouble; 561 C->info.fill_ratio_given = fill; 562 C->info.fill_ratio_needed = afill; 563 564 #if defined(PETSC_USE_INFO) 565 if (ci[am]) { 566 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 567 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 568 } else { 569 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 570 } 571 #endif 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 576 { 577 PetscErrorCode ierr; 578 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 579 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 580 PetscInt *ci,*cj,*bb; 581 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 582 PetscReal afill; 583 PetscInt i,j,col,ndouble = 0; 584 PetscFreeSpaceList free_space=NULL,current_space=NULL; 585 PetscHeap h; 586 587 PetscFunctionBegin; 588 /* Get ci and cj - by merging sorted rows using a heap */ 589 /*---------------------------------------------------------------------------------------------*/ 590 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 591 ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 592 ci[0] = 0; 593 594 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 595 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 596 current_space = free_space; 597 598 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 599 ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 600 601 /* Determine ci and cj */ 602 for (i=0; i<am; i++) { 603 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 604 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 605 ci[i+1] = ci[i]; 606 /* Populate the min heap */ 607 for (j=0; j<anzi; j++) { 608 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 609 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 610 ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr); 611 } 612 } 613 /* Pick off the min element, adding it to free space */ 614 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 615 while (j >= 0) { 616 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 617 ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 618 ndouble++; 619 } 620 *(current_space->array++) = col; 621 current_space->local_used++; 622 current_space->local_remaining--; 623 ci[i+1]++; 624 625 /* stash if anything else remains in this row of B */ 626 if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);} 627 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 628 PetscInt j2,col2; 629 ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr); 630 if (col2 != col) break; 631 ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr); 632 if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);} 633 } 634 /* Put any stashed elements back into the min heap */ 635 ierr = PetscHeapUnstash(h);CHKERRQ(ierr); 636 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 637 } 638 } 639 ierr = PetscFree(bb);CHKERRQ(ierr); 640 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 641 642 /* Column indices are in the list of free space */ 643 /* Allocate space for cj, initialize cj, and */ 644 /* destroy list of free space and other temporary array(s) */ 645 ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 646 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 647 648 /* put together the new symbolic matrix */ 649 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 650 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 651 652 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 653 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 654 c = (Mat_SeqAIJ*)(C->data); 655 c->free_a = PETSC_TRUE; 656 c->free_ij = PETSC_TRUE; 657 c->nonew = 0; 658 659 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 660 661 /* set MatInfo */ 662 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 663 if (afill < 1.0) afill = 1.0; 664 c->maxnz = ci[am]; 665 c->nz = ci[am]; 666 C->info.mallocs = ndouble; 667 C->info.fill_ratio_given = fill; 668 C->info.fill_ratio_needed = afill; 669 670 #if defined(PETSC_USE_INFO) 671 if (ci[am]) { 672 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 673 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 674 } else { 675 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 676 } 677 #endif 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 682 { 683 PetscErrorCode ierr; 684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 685 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 686 PetscInt *ci,*cj,*bb; 687 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 688 PetscReal afill; 689 PetscInt i,j,col,ndouble = 0; 690 PetscFreeSpaceList free_space=NULL,current_space=NULL; 691 PetscHeap h; 692 PetscBT bt; 693 694 PetscFunctionBegin; 695 /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 696 /*---------------------------------------------------------------------------------------------*/ 697 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 698 ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 699 ci[0] = 0; 700 701 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 702 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 703 704 current_space = free_space; 705 706 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 707 ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 708 ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr); 709 710 /* Determine ci and cj */ 711 for (i=0; i<am; i++) { 712 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 713 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 714 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 715 ci[i+1] = ci[i]; 716 /* Populate the min heap */ 717 for (j=0; j<anzi; j++) { 718 PetscInt brow = acol[j]; 719 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 720 PetscInt bcol = bj[bb[j]]; 721 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 722 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 723 bb[j]++; 724 break; 725 } 726 } 727 } 728 /* Pick off the min element, adding it to free space */ 729 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 730 while (j >= 0) { 731 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 732 fptr = NULL; /* need PetscBTMemzero */ 733 ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 734 ndouble++; 735 } 736 *(current_space->array++) = col; 737 current_space->local_used++; 738 current_space->local_remaining--; 739 ci[i+1]++; 740 741 /* stash if anything else remains in this row of B */ 742 for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 743 PetscInt bcol = bj[bb[j]]; 744 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 745 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 746 bb[j]++; 747 break; 748 } 749 } 750 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 751 } 752 if (fptr) { /* Clear the bits for this row */ 753 for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);} 754 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 755 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 756 } 757 } 758 ierr = PetscFree(bb);CHKERRQ(ierr); 759 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 760 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 761 762 /* Column indices are in the list of free space */ 763 /* Allocate space for cj, initialize cj, and */ 764 /* destroy list of free space and other temporary array(s) */ 765 ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 766 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 767 768 /* put together the new symbolic matrix */ 769 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 770 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 771 772 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 773 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 774 c = (Mat_SeqAIJ*)(C->data); 775 c->free_a = PETSC_TRUE; 776 c->free_ij = PETSC_TRUE; 777 c->nonew = 0; 778 779 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 780 781 /* set MatInfo */ 782 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 783 if (afill < 1.0) afill = 1.0; 784 c->maxnz = ci[am]; 785 c->nz = ci[am]; 786 C->info.mallocs = ndouble; 787 C->info.fill_ratio_given = fill; 788 C->info.fill_ratio_needed = afill; 789 790 #if defined(PETSC_USE_INFO) 791 if (ci[am]) { 792 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 793 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 794 } else { 795 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 796 } 797 #endif 798 PetscFunctionReturn(0); 799 } 800 801 802 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 803 { 804 PetscErrorCode ierr; 805 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 806 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 807 PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 808 PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 809 const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 810 const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 811 const PetscInt *brow_ptr[8],*brow_end[8]; 812 PetscInt window[8]; 813 PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 814 PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 815 PetscReal afill; 816 PetscInt *workj_L1,*workj_L2,*workj_L3; 817 PetscInt L1_nnz,L2_nnz; 818 819 /* Step 1: Get upper bound on memory required for allocation. 820 Because of the way virtual memory works, 821 only the memory pages that are actually needed will be physically allocated. */ 822 PetscFunctionBegin; 823 ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 824 for (i=0; i<am; i++) { 825 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 826 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 827 a_rownnz = 0; 828 for (k=0; k<anzi; ++k) { 829 a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 830 if (a_rownnz > bn) { 831 a_rownnz = bn; 832 break; 833 } 834 } 835 a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 836 } 837 /* temporary work areas for merging rows */ 838 ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1);CHKERRQ(ierr); 839 ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2);CHKERRQ(ierr); 840 ierr = PetscMalloc1(a_maxrownnz,&workj_L3);CHKERRQ(ierr); 841 842 /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 843 c_maxmem = 8*(ai[am]+bi[bm]); 844 /* Step 2: Populate pattern for C */ 845 ierr = PetscMalloc1(c_maxmem,&cj);CHKERRQ(ierr); 846 847 ci_nnz = 0; 848 ci[0] = 0; 849 worki_L1[0] = 0; 850 worki_L2[0] = 0; 851 for (i=0; i<am; i++) { 852 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 853 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 854 rowsleft = anzi; 855 inputcol_L1 = acol; 856 L2_nnz = 0; 857 L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 858 worki_L2[1] = 0; 859 outputi_nnz = 0; 860 861 /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 862 while (ci_nnz+a_maxrownnz > c_maxmem) { 863 c_maxmem *= 2; 864 ndouble++; 865 ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);CHKERRQ(ierr); 866 } 867 868 while (rowsleft) { 869 L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 870 L1_nrows = 0; 871 L1_nnz = 0; 872 inputcol = inputcol_L1; 873 inputi = bi; 874 inputj = bj; 875 876 /* The following macro is used to specialize for small rows in A. 877 This helps with compiler unrolling, improving performance substantially. 878 Input: inputj inputi inputcol bn 879 Output: outputj outputi_nnz */ 880 #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 881 window_min = bn; \ 882 outputi_nnz = 0; \ 883 for (k=0; k<ANNZ; ++k) { \ 884 brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 885 brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 886 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 887 window_min = PetscMin(window[k], window_min); \ 888 } \ 889 while (window_min < bn) { \ 890 outputj[outputi_nnz++] = window_min; \ 891 /* advance front and compute new minimum */ \ 892 old_window_min = window_min; \ 893 window_min = bn; \ 894 for (k=0; k<ANNZ; ++k) { \ 895 if (window[k] == old_window_min) { \ 896 brow_ptr[k]++; \ 897 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 898 } \ 899 window_min = PetscMin(window[k], window_min); \ 900 } \ 901 } 902 903 /************** L E V E L 1 ***************/ 904 /* Merge up to 8 rows of B to L1 work array*/ 905 while (L1_rowsleft) { 906 outputi_nnz = 0; 907 if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 908 else outputj = cj + ci_nnz; /* Merge directly to C */ 909 910 switch (L1_rowsleft) { 911 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 912 brow_end[0] = inputj + inputi[inputcol[0]+1]; 913 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 914 inputcol += L1_rowsleft; 915 rowsleft -= L1_rowsleft; 916 L1_rowsleft = 0; 917 break; 918 case 2: MatMatMultSymbolic_RowMergeMacro(2); 919 inputcol += L1_rowsleft; 920 rowsleft -= L1_rowsleft; 921 L1_rowsleft = 0; 922 break; 923 case 3: MatMatMultSymbolic_RowMergeMacro(3); 924 inputcol += L1_rowsleft; 925 rowsleft -= L1_rowsleft; 926 L1_rowsleft = 0; 927 break; 928 case 4: MatMatMultSymbolic_RowMergeMacro(4); 929 inputcol += L1_rowsleft; 930 rowsleft -= L1_rowsleft; 931 L1_rowsleft = 0; 932 break; 933 case 5: MatMatMultSymbolic_RowMergeMacro(5); 934 inputcol += L1_rowsleft; 935 rowsleft -= L1_rowsleft; 936 L1_rowsleft = 0; 937 break; 938 case 6: MatMatMultSymbolic_RowMergeMacro(6); 939 inputcol += L1_rowsleft; 940 rowsleft -= L1_rowsleft; 941 L1_rowsleft = 0; 942 break; 943 case 7: MatMatMultSymbolic_RowMergeMacro(7); 944 inputcol += L1_rowsleft; 945 rowsleft -= L1_rowsleft; 946 L1_rowsleft = 0; 947 break; 948 default: MatMatMultSymbolic_RowMergeMacro(8); 949 inputcol += 8; 950 rowsleft -= 8; 951 L1_rowsleft -= 8; 952 break; 953 } 954 inputcol_L1 = inputcol; 955 L1_nnz += outputi_nnz; 956 worki_L1[++L1_nrows] = L1_nnz; 957 } 958 959 /********************** L E V E L 2 ************************/ 960 /* Merge from L1 work array to either C or to L2 work array */ 961 if (anzi > 8) { 962 inputi = worki_L1; 963 inputj = workj_L1; 964 inputcol = workcol; 965 outputi_nnz = 0; 966 967 if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 968 else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 969 970 switch (L1_nrows) { 971 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 972 brow_end[0] = inputj + inputi[inputcol[0]+1]; 973 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 974 break; 975 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 976 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 977 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 978 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 979 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 980 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 981 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 982 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 983 } 984 L2_nnz += outputi_nnz; 985 worki_L2[++L2_nrows] = L2_nnz; 986 987 /************************ L E V E L 3 **********************/ 988 /* Merge from L2 work array to either C or to L2 work array */ 989 if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 990 inputi = worki_L2; 991 inputj = workj_L2; 992 inputcol = workcol; 993 outputi_nnz = 0; 994 if (rowsleft) outputj = workj_L3; 995 else outputj = cj + ci_nnz; 996 switch (L2_nrows) { 997 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 998 brow_end[0] = inputj + inputi[inputcol[0]+1]; 999 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1000 break; 1001 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1002 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1003 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1004 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1005 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1006 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1007 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1008 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1009 } 1010 L2_nrows = 1; 1011 L2_nnz = outputi_nnz; 1012 worki_L2[1] = outputi_nnz; 1013 /* Copy to workj_L2 */ 1014 if (rowsleft) { 1015 for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1016 } 1017 } 1018 } 1019 } /* while (rowsleft) */ 1020 #undef MatMatMultSymbolic_RowMergeMacro 1021 1022 /* terminate current row */ 1023 ci_nnz += outputi_nnz; 1024 ci[i+1] = ci_nnz; 1025 } 1026 1027 /* Step 3: Create the new symbolic matrix */ 1028 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 1029 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1030 1031 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1032 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1033 c = (Mat_SeqAIJ*)(C->data); 1034 c->free_a = PETSC_TRUE; 1035 c->free_ij = PETSC_TRUE; 1036 c->nonew = 0; 1037 1038 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1039 1040 /* set MatInfo */ 1041 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1042 if (afill < 1.0) afill = 1.0; 1043 c->maxnz = ci[am]; 1044 c->nz = ci[am]; 1045 C->info.mallocs = ndouble; 1046 C->info.fill_ratio_given = fill; 1047 C->info.fill_ratio_needed = afill; 1048 1049 #if defined(PETSC_USE_INFO) 1050 if (ci[am]) { 1051 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 1052 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1053 } else { 1054 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1055 } 1056 #endif 1057 1058 /* Step 4: Free temporary work areas */ 1059 ierr = PetscFree(workj_L1);CHKERRQ(ierr); 1060 ierr = PetscFree(workj_L2);CHKERRQ(ierr); 1061 ierr = PetscFree(workj_L3);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* concatenate unique entries and then sort */ 1066 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1067 { 1068 PetscErrorCode ierr; 1069 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1070 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1071 PetscInt *ci,*cj; 1072 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1073 PetscReal afill; 1074 PetscInt i,j,ndouble = 0; 1075 PetscSegBuffer seg,segrow; 1076 char *seen; 1077 1078 PetscFunctionBegin; 1079 ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 1080 ci[0] = 0; 1081 1082 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1083 ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr); 1084 ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr); 1085 ierr = PetscCalloc1(bn,&seen);CHKERRQ(ierr); 1086 1087 /* Determine ci and cj */ 1088 for (i=0; i<am; i++) { 1089 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1090 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1091 PetscInt packlen = 0,*PETSC_RESTRICT crow; 1092 /* Pack segrow */ 1093 for (j=0; j<anzi; j++) { 1094 PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1095 for (k=bjstart; k<bjend; k++) { 1096 PetscInt bcol = bj[k]; 1097 if (!seen[bcol]) { /* new entry */ 1098 PetscInt *PETSC_RESTRICT slot; 1099 ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr); 1100 *slot = bcol; 1101 seen[bcol] = 1; 1102 packlen++; 1103 } 1104 } 1105 } 1106 ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr); 1107 ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr); 1108 ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr); 1109 ci[i+1] = ci[i] + packlen; 1110 for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1111 } 1112 ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr); 1113 ierr = PetscFree(seen);CHKERRQ(ierr); 1114 1115 /* Column indices are in the segmented buffer */ 1116 ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr); 1117 ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr); 1118 1119 /* put together the new symbolic matrix */ 1120 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 1121 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1122 1123 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1124 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1125 c = (Mat_SeqAIJ*)(C->data); 1126 c->free_a = PETSC_TRUE; 1127 c->free_ij = PETSC_TRUE; 1128 c->nonew = 0; 1129 1130 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1131 1132 /* set MatInfo */ 1133 afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1134 if (afill < 1.0) afill = 1.0; 1135 c->maxnz = ci[am]; 1136 c->nz = ci[am]; 1137 C->info.mallocs = ndouble; 1138 C->info.fill_ratio_given = fill; 1139 C->info.fill_ratio_needed = afill; 1140 1141 #if defined(PETSC_USE_INFO) 1142 if (ci[am]) { 1143 ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 1144 ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1145 } else { 1146 ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1147 } 1148 #endif 1149 PetscFunctionReturn(0); 1150 } 1151 1152 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 1153 { 1154 PetscErrorCode ierr; 1155 Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 1156 1157 PetscFunctionBegin; 1158 ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr); 1159 ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr); 1160 ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr); 1161 ierr = PetscFree(abt);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1166 { 1167 PetscErrorCode ierr; 1168 Mat Bt; 1169 PetscInt *bti,*btj; 1170 Mat_MatMatTransMult *abt; 1171 Mat_Product *product = C->product; 1172 char *alg; 1173 1174 PetscFunctionBegin; 1175 if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1176 if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1177 1178 /* create symbolic Bt */ 1179 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 1180 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);CHKERRQ(ierr); 1181 ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 1182 ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr); 1183 1184 /* get symbolic C=A*Bt */ 1185 ierr = PetscStrallocpy(product->alg,&alg);CHKERRQ(ierr); 1186 ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); /* set algorithm for C = A*Bt */ 1187 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 1188 ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); /* resume original algorithm for ABt product */ 1189 ierr = PetscFree(alg);CHKERRQ(ierr); 1190 1191 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 1192 ierr = PetscNew(&abt);CHKERRQ(ierr); 1193 1194 product->data = abt; 1195 product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 1196 1197 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 1198 1199 abt->usecoloring = PETSC_FALSE; 1200 ierr = PetscStrcmp(product->alg,"color",&abt->usecoloring);CHKERRQ(ierr); 1201 if (abt->usecoloring) { 1202 /* Create MatTransposeColoring from symbolic C=A*B^T */ 1203 MatTransposeColoring matcoloring; 1204 MatColoring coloring; 1205 ISColoring iscoloring; 1206 Mat Bt_dense,C_dense; 1207 1208 /* inode causes memory problem */ 1209 ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 1210 1211 ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr); 1212 ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr); 1213 ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr); 1214 ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr); 1215 ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr); 1216 ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr); 1217 ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr); 1218 1219 abt->matcoloring = matcoloring; 1220 1221 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 1222 1223 /* Create Bt_dense and C_dense = A*Bt_dense */ 1224 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 1225 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 1226 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 1227 ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL);CHKERRQ(ierr); 1228 1229 Bt_dense->assembled = PETSC_TRUE; 1230 abt->Bt_den = Bt_dense; 1231 1232 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 1233 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 1234 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 1235 ierr = MatSeqDenseSetPreallocation(C_dense,NULL);CHKERRQ(ierr); 1236 1237 Bt_dense->assembled = PETSC_TRUE; 1238 abt->ABt_den = C_dense; 1239 1240 #if defined(PETSC_USE_INFO) 1241 { 1242 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 1243 ierr = PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));CHKERRQ(ierr); 1244 } 1245 #endif 1246 } 1247 /* clean up */ 1248 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 1249 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1254 { 1255 PetscErrorCode ierr; 1256 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1257 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 1258 PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 1259 PetscLogDouble flops=0.0; 1260 MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 1261 Mat_MatMatTransMult *abt; 1262 Mat_Product *product = C->product; 1263 1264 PetscFunctionBegin; 1265 if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1266 abt = (Mat_MatMatTransMult *)product->data; 1267 if (!abt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1268 /* clear old values in C */ 1269 if (!c->a) { 1270 ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 1271 c->a = ca; 1272 c->free_a = PETSC_TRUE; 1273 } else { 1274 ca = c->a; 1275 ierr = PetscArrayzero(ca,ci[cm]+1);CHKERRQ(ierr); 1276 } 1277 1278 if (abt->usecoloring) { 1279 MatTransposeColoring matcoloring = abt->matcoloring; 1280 Mat Bt_dense,C_dense = abt->ABt_den; 1281 1282 /* Get Bt_dense by Apply MatTransposeColoring to B */ 1283 Bt_dense = abt->Bt_den; 1284 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 1285 1286 /* C_dense = A*Bt_dense */ 1287 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 1288 1289 /* Recover C from C_dense */ 1290 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 for (i=0; i<cm; i++) { 1295 anzi = ai[i+1] - ai[i]; 1296 acol = aj + ai[i]; 1297 aval = aa + ai[i]; 1298 cnzi = ci[i+1] - ci[i]; 1299 ccol = cj + ci[i]; 1300 cval = ca + ci[i]; 1301 for (j=0; j<cnzi; j++) { 1302 brow = ccol[j]; 1303 bnzj = bi[brow+1] - bi[brow]; 1304 bcol = bj + bi[brow]; 1305 bval = ba + bi[brow]; 1306 1307 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 1308 nexta = 0; nextb = 0; 1309 while (nexta<anzi && nextb<bnzj) { 1310 while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 1311 if (nexta == anzi) break; 1312 while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 1313 if (nextb == bnzj) break; 1314 if (acol[nexta] == bcol[nextb]) { 1315 cval[j] += aval[nexta]*bval[nextb]; 1316 nexta++; nextb++; 1317 flops += 2; 1318 } 1319 } 1320 } 1321 } 1322 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1323 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1324 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 1329 { 1330 PetscErrorCode ierr; 1331 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 1332 1333 PetscFunctionBegin; 1334 ierr = MatDestroy(&atb->At);CHKERRQ(ierr); 1335 if (atb->destroy) { 1336 ierr = (*atb->destroy)(atb->data);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree(atb);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1343 { 1344 PetscErrorCode ierr; 1345 Mat At = NULL; 1346 PetscInt *ati,*atj; 1347 Mat_Product *product = C->product; 1348 PetscBool flg,def,square; 1349 1350 PetscFunctionBegin; 1351 MatCheckProduct(C,4); 1352 square = (PetscBool)(A == B && A->symmetric && A->symmetric_set); 1353 /* outerproduct */ 1354 ierr = PetscStrcmp(product->alg,"outerproduct",&flg);CHKERRQ(ierr); 1355 if (flg) { 1356 /* create symbolic At */ 1357 if (!square) { 1358 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1359 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);CHKERRQ(ierr); 1360 ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 1361 ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr); 1362 } 1363 /* get symbolic C=At*B */ 1364 ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1365 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 1366 1367 /* clean up */ 1368 if (!square) { 1369 ierr = MatDestroy(&At);CHKERRQ(ierr); 1370 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1371 } 1372 1373 C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 1374 ierr = MatProductSetAlgorithm(C,"outerproduct");CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /* matmatmult */ 1379 ierr = PetscStrcmp(product->alg,"default",&def);CHKERRQ(ierr); 1380 ierr = PetscStrcmp(product->alg,"at*b",&flg);CHKERRQ(ierr); 1381 if (flg || def) { 1382 Mat_MatTransMatMult *atb; 1383 1384 if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1385 ierr = PetscNew(&atb);CHKERRQ(ierr); 1386 if (!square) { 1387 ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 1388 } 1389 ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1390 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 1391 ierr = MatProductSetAlgorithm(C,"at*b");CHKERRQ(ierr); 1392 product->data = atb; 1393 product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 1394 atb->At = At; 1395 atb->updateAt = PETSC_FALSE; /* because At is computed here */ 1396 1397 C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 1398 PetscFunctionReturn(0); 1399 } 1400 1401 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1402 } 1403 1404 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1405 { 1406 PetscErrorCode ierr; 1407 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1408 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1409 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1410 PetscLogDouble flops=0.0; 1411 MatScalar *aa=a->a,*ba,*ca,*caj; 1412 1413 PetscFunctionBegin; 1414 if (!c->a) { 1415 ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 1416 1417 c->a = ca; 1418 c->free_a = PETSC_TRUE; 1419 } else { 1420 ca = c->a; 1421 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 1422 } 1423 1424 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1425 for (i=0; i<am; i++) { 1426 bj = b->j + bi[i]; 1427 ba = b->a + bi[i]; 1428 bnzi = bi[i+1] - bi[i]; 1429 anzi = ai[i+1] - ai[i]; 1430 for (j=0; j<anzi; j++) { 1431 nextb = 0; 1432 crow = *aj++; 1433 cjj = cj + ci[crow]; 1434 caj = ca + ci[crow]; 1435 /* perform sparse axpy operation. Note cjj includes bj. */ 1436 for (k=0; nextb<bnzi; k++) { 1437 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1438 caj[k] += (*aa)*(*(ba+nextb)); 1439 nextb++; 1440 } 1441 } 1442 flops += 2*bnzi; 1443 aa++; 1444 } 1445 } 1446 1447 /* Assemble the final matrix and clean up */ 1448 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1449 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 1455 { 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1460 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 1465 { 1466 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1467 Mat_SeqDense *bd=(Mat_SeqDense*)B->data; 1468 Mat_SeqDense *cd=(Mat_SeqDense*)C->data; 1469 PetscErrorCode ierr; 1470 PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1471 const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1472 const PetscInt *aj; 1473 PetscInt cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n; 1474 PetscInt clda=cd->lda; 1475 PetscInt am4=4*clda,bm4=4*bm,col,i,j,n; 1476 1477 PetscFunctionBegin; 1478 if (!cm || !cn) PetscFunctionReturn(0); 1479 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 1480 if (add) { 1481 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatDenseGetArrayWrite(C,&c);CHKERRQ(ierr); 1484 } 1485 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1486 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1487 c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 1488 for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 1489 for (i=0; i<am; i++) { /* over rows of A in those columns */ 1490 r1 = r2 = r3 = r4 = 0.0; 1491 n = a->i[i+1] - a->i[i]; 1492 aj = a->j + a->i[i]; 1493 aa = av + a->i[i]; 1494 for (j=0; j<n; j++) { 1495 const PetscScalar aatmp = aa[j]; 1496 const PetscInt ajtmp = aj[j]; 1497 r1 += aatmp*b1[ajtmp]; 1498 r2 += aatmp*b2[ajtmp]; 1499 r3 += aatmp*b3[ajtmp]; 1500 r4 += aatmp*b4[ajtmp]; 1501 } 1502 if (add) { 1503 c1[i] += r1; 1504 c2[i] += r2; 1505 c3[i] += r3; 1506 c4[i] += r4; 1507 } else { 1508 c1[i] = r1; 1509 c2[i] = r2; 1510 c3[i] = r3; 1511 c4[i] = r4; 1512 } 1513 } 1514 b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1515 c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1516 } 1517 /* process remaining columns */ 1518 if (col != cn) { 1519 PetscInt rc = cn-col; 1520 1521 if (rc == 1) { 1522 for (i=0; i<am; i++) { 1523 r1 = 0.0; 1524 n = a->i[i+1] - a->i[i]; 1525 aj = a->j + a->i[i]; 1526 aa = av + a->i[i]; 1527 for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 1528 if (add) c1[i] += r1; 1529 else c1[i] = r1; 1530 } 1531 } else if (rc == 2) { 1532 for (i=0; i<am; i++) { 1533 r1 = r2 = 0.0; 1534 n = a->i[i+1] - a->i[i]; 1535 aj = a->j + a->i[i]; 1536 aa = av + a->i[i]; 1537 for (j=0; j<n; j++) { 1538 const PetscScalar aatmp = aa[j]; 1539 const PetscInt ajtmp = aj[j]; 1540 r1 += aatmp*b1[ajtmp]; 1541 r2 += aatmp*b2[ajtmp]; 1542 } 1543 if (add) { 1544 c1[i] += r1; 1545 c2[i] += r2; 1546 } else { 1547 c1[i] = r1; 1548 c2[i] = r2; 1549 } 1550 } 1551 } else { 1552 for (i=0; i<am; i++) { 1553 r1 = r2 = r3 = 0.0; 1554 n = a->i[i+1] - a->i[i]; 1555 aj = a->j + a->i[i]; 1556 aa = av + a->i[i]; 1557 for (j=0; j<n; j++) { 1558 const PetscScalar aatmp = aa[j]; 1559 const PetscInt ajtmp = aj[j]; 1560 r1 += aatmp*b1[ajtmp]; 1561 r2 += aatmp*b2[ajtmp]; 1562 r3 += aatmp*b3[ajtmp]; 1563 } 1564 if (add) { 1565 c1[i] += r1; 1566 c2[i] += r2; 1567 c3[i] += r3; 1568 } else { 1569 c1[i] = r1; 1570 c2[i] = r2; 1571 c3[i] = r3; 1572 } 1573 } 1574 } 1575 } 1576 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1577 if (add) { 1578 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatDenseRestoreArrayWrite(C,&c);CHKERRQ(ierr); 1581 } 1582 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1583 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n); 1593 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1594 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1595 1596 ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 /* ------------------------------------------------------- */ 1601 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 1602 { 1603 PetscFunctionBegin; 1604 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 1605 C->ops->productsymbolic = MatProductSymbolic_AB; 1606 PetscFunctionReturn(0); 1607 } 1608 1609 PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 1610 1611 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 1612 { 1613 PetscFunctionBegin; 1614 C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1615 C->ops->productsymbolic = MatProductSymbolic_AtB; 1616 PetscFunctionReturn(0); 1617 } 1618 1619 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 1620 { 1621 PetscFunctionBegin; 1622 C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1623 C->ops->productsymbolic = MatProductSymbolic_ABt; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 1628 { 1629 PetscErrorCode ierr; 1630 Mat_Product *product = C->product; 1631 1632 PetscFunctionBegin; 1633 switch (product->type) { 1634 case MATPRODUCT_AB: 1635 ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);CHKERRQ(ierr); 1636 break; 1637 case MATPRODUCT_AtB: 1638 ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);CHKERRQ(ierr); 1639 break; 1640 case MATPRODUCT_ABt: 1641 ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C);CHKERRQ(ierr); 1642 break; 1643 default: 1644 break; 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 /* ------------------------------------------------------- */ 1649 static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 1650 { 1651 PetscErrorCode ierr; 1652 Mat_Product *product = C->product; 1653 Mat A = product->A; 1654 PetscBool baij; 1655 1656 PetscFunctionBegin; 1657 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);CHKERRQ(ierr); 1658 if (!baij) { /* A is seqsbaij */ 1659 PetscBool sbaij; 1660 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 1661 if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 1662 1663 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 1664 } else { /* A is seqbaij */ 1665 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 1666 } 1667 1668 C->ops->productsymbolic = MatProductSymbolic_AB; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 1673 { 1674 PetscErrorCode ierr; 1675 Mat_Product *product = C->product; 1676 1677 PetscFunctionBegin; 1678 MatCheckProduct(C,1); 1679 if (!product->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 1680 if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric)) { 1681 ierr = MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 /* ------------------------------------------------------- */ 1687 static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 1688 { 1689 PetscFunctionBegin; 1690 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 1691 C->ops->productsymbolic = MatProductSymbolic_AB; 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 1696 { 1697 PetscErrorCode ierr; 1698 Mat_Product *product = C->product; 1699 1700 PetscFunctionBegin; 1701 if (product->type == MATPRODUCT_AB) { 1702 ierr = MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);CHKERRQ(ierr); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 /* ------------------------------------------------------- */ 1707 1708 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1709 { 1710 PetscErrorCode ierr; 1711 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1712 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1713 PetscInt *bi = b->i,*bj=b->j; 1714 PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1715 MatScalar *btval,*btval_den,*ba=b->a; 1716 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1717 1718 PetscFunctionBegin; 1719 btval_den=btdense->v; 1720 ierr = PetscArrayzero(btval_den,m*n);CHKERRQ(ierr); 1721 for (k=0; k<ncolors; k++) { 1722 ncolumns = coloring->ncolumns[k]; 1723 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1724 col = *(columns + colorforcol[k] + l); 1725 btcol = bj + bi[col]; 1726 btval = ba + bi[col]; 1727 anz = bi[col+1] - bi[col]; 1728 for (j=0; j<anz; j++) { 1729 brow = btcol[j]; 1730 btval_den[brow] = btval[j]; 1731 } 1732 } 1733 btval_den += m; 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1739 { 1740 PetscErrorCode ierr; 1741 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1742 const PetscScalar *ca_den,*ca_den_ptr; 1743 PetscScalar *ca=csp->a; 1744 PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1745 PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1746 PetscInt nrows,*row,*idx; 1747 PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 1748 1749 PetscFunctionBegin; 1750 ierr = MatDenseGetArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1751 1752 if (brows > 0) { 1753 PetscInt *lstart,row_end,row_start; 1754 lstart = matcoloring->lstart; 1755 ierr = PetscArrayzero(lstart,ncolors);CHKERRQ(ierr); 1756 1757 row_end = brows; 1758 if (row_end > m) row_end = m; 1759 for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1760 ca_den_ptr = ca_den; 1761 for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1762 nrows = matcoloring->nrows[k]; 1763 row = rows + colorforrow[k]; 1764 idx = den2sp + colorforrow[k]; 1765 for (l=lstart[k]; l<nrows; l++) { 1766 if (row[l] >= row_end) { 1767 lstart[k] = l; 1768 break; 1769 } else { 1770 ca[idx[l]] = ca_den_ptr[row[l]]; 1771 } 1772 } 1773 ca_den_ptr += m; 1774 } 1775 row_end += brows; 1776 if (row_end > m) row_end = m; 1777 } 1778 } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1779 ca_den_ptr = ca_den; 1780 for (k=0; k<ncolors; k++) { 1781 nrows = matcoloring->nrows[k]; 1782 row = rows + colorforrow[k]; 1783 idx = den2sp + colorforrow[k]; 1784 for (l=0; l<nrows; l++) { 1785 ca[idx[l]] = ca_den_ptr[row[l]]; 1786 } 1787 ca_den_ptr += m; 1788 } 1789 } 1790 1791 ierr = MatDenseRestoreArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1792 #if defined(PETSC_USE_INFO) 1793 if (matcoloring->brows > 0) { 1794 ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr); 1795 } else { 1796 ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");CHKERRQ(ierr); 1797 } 1798 #endif 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1803 { 1804 PetscErrorCode ierr; 1805 PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 1806 const PetscInt *is,*ci,*cj,*row_idx; 1807 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1808 IS *isa; 1809 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1810 PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1811 PetscInt *colorforcol,*columns,*columns_i,brows; 1812 PetscBool flg; 1813 1814 PetscFunctionBegin; 1815 ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1816 1817 /* bs >1 is not being tested yet! */ 1818 Nbs = mat->cmap->N/bs; 1819 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1820 c->N = Nbs; 1821 c->m = c->M; 1822 c->rstart = 0; 1823 c->brows = 100; 1824 1825 c->ncolors = nis; 1826 ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);CHKERRQ(ierr); 1827 ierr = PetscMalloc1(csp->nz+1,&rows);CHKERRQ(ierr); 1828 ierr = PetscMalloc1(csp->nz+1,&den2sp);CHKERRQ(ierr); 1829 1830 brows = c->brows; 1831 ierr = PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr); 1832 if (flg) c->brows = brows; 1833 if (brows > 0) { 1834 ierr = PetscMalloc1(nis+1,&c->lstart);CHKERRQ(ierr); 1835 } 1836 1837 colorforrow[0] = 0; 1838 rows_i = rows; 1839 den2sp_i = den2sp; 1840 1841 ierr = PetscMalloc1(nis+1,&colorforcol);CHKERRQ(ierr); 1842 ierr = PetscMalloc1(Nbs+1,&columns);CHKERRQ(ierr); 1843 1844 colorforcol[0] = 0; 1845 columns_i = columns; 1846 1847 /* get column-wise storage of mat */ 1848 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1849 1850 cm = c->m; 1851 ierr = PetscMalloc1(cm+1,&rowhit);CHKERRQ(ierr); 1852 ierr = PetscMalloc1(cm+1,&idxhit);CHKERRQ(ierr); 1853 for (i=0; i<nis; i++) { /* loop over color */ 1854 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1855 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1856 1857 c->ncolumns[i] = n; 1858 if (n) { 1859 ierr = PetscArraycpy(columns_i,is,n);CHKERRQ(ierr); 1860 } 1861 colorforcol[i+1] = colorforcol[i] + n; 1862 columns_i += n; 1863 1864 /* fast, crude version requires O(N*N) work */ 1865 ierr = PetscArrayzero(rowhit,cm);CHKERRQ(ierr); 1866 1867 for (j=0; j<n; j++) { /* loop over columns*/ 1868 col = is[j]; 1869 row_idx = cj + ci[col]; 1870 m = ci[col+1] - ci[col]; 1871 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1872 idxhit[*row_idx] = spidx[ci[col] + k]; 1873 rowhit[*row_idx++] = col + 1; 1874 } 1875 } 1876 /* count the number of hits */ 1877 nrows = 0; 1878 for (j=0; j<cm; j++) { 1879 if (rowhit[j]) nrows++; 1880 } 1881 c->nrows[i] = nrows; 1882 colorforrow[i+1] = colorforrow[i] + nrows; 1883 1884 nrows = 0; 1885 for (j=0; j<cm; j++) { /* loop over rows */ 1886 if (rowhit[j]) { 1887 rows_i[nrows] = j; 1888 den2sp_i[nrows] = idxhit[j]; 1889 nrows++; 1890 } 1891 } 1892 den2sp_i += nrows; 1893 1894 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1895 rows_i += nrows; 1896 } 1897 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1898 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1899 ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);CHKERRQ(ierr); 1900 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1901 1902 c->colorforrow = colorforrow; 1903 c->rows = rows; 1904 c->den2sp = den2sp; 1905 c->colorforcol = colorforcol; 1906 c->columns = columns; 1907 1908 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /* --------------------------------------------------------------- */ 1913 static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1914 { 1915 PetscErrorCode ierr; 1916 Mat_Product *product = C->product; 1917 Mat A=product->A,B=product->B; 1918 1919 PetscFunctionBegin; 1920 if (C->ops->mattransposemultnumeric) { 1921 /* Alg: "outerproduct" */ 1922 ierr = (*C->ops->mattransposemultnumeric)(A,B,C);CHKERRQ(ierr); 1923 } else { 1924 /* Alg: "matmatmult" -- C = At*B */ 1925 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 1926 Mat At; 1927 1928 if (!atb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1929 At = atb->At; 1930 if (atb->updateAt && At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */ 1931 ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr); 1932 } 1933 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(At ? At : A,B,C);CHKERRQ(ierr); 1934 atb->updateAt = PETSC_TRUE; 1935 } 1936 PetscFunctionReturn(0); 1937 } 1938 1939 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1940 { 1941 PetscErrorCode ierr; 1942 Mat_Product *product = C->product; 1943 Mat A=product->A,B=product->B; 1944 PetscReal fill=product->fill; 1945 1946 PetscFunctionBegin; 1947 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 1948 1949 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 /* --------------------------------------------------------------- */ 1954 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 1955 { 1956 PetscErrorCode ierr; 1957 Mat_Product *product = C->product; 1958 PetscInt alg = 0; /* default algorithm */ 1959 PetscBool flg = PETSC_FALSE; 1960 #if !defined(PETSC_HAVE_HYPRE) 1961 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 1962 PetscInt nalg = 7; 1963 #else 1964 const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 1965 PetscInt nalg = 8; 1966 #endif 1967 1968 PetscFunctionBegin; 1969 /* Set default algorithm */ 1970 ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 1971 if (flg) { 1972 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 1973 } 1974 1975 /* Get runtime option */ 1976 if (product->api_user) { 1977 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 1978 ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 1979 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1980 } else { 1981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 1982 ierr = PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 1983 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1984 } 1985 if (flg) { 1986 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 1987 } 1988 1989 C->ops->productsymbolic = MatProductSymbolic_AB; 1990 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 1991 PetscFunctionReturn(0); 1992 } 1993 1994 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 1995 { 1996 PetscErrorCode ierr; 1997 Mat_Product *product = C->product; 1998 PetscInt alg = 0; /* default algorithm */ 1999 PetscBool flg = PETSC_FALSE; 2000 const char *algTypes[3] = {"default","at*b","outerproduct"}; 2001 PetscInt nalg = 3; 2002 2003 PetscFunctionBegin; 2004 /* Get runtime option */ 2005 if (product->api_user) { 2006 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 2007 ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2009 } else { 2010 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 2011 ierr = PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2012 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2013 } 2014 if (flg) { 2015 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2016 } 2017 2018 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 2023 { 2024 PetscErrorCode ierr; 2025 Mat_Product *product = C->product; 2026 PetscInt alg = 0; /* default algorithm */ 2027 PetscBool flg = PETSC_FALSE; 2028 const char *algTypes[2] = {"default","color"}; 2029 PetscInt nalg = 2; 2030 2031 PetscFunctionBegin; 2032 /* Set default algorithm */ 2033 ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 2034 if (!flg) { 2035 alg = 1; 2036 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2037 } 2038 2039 /* Get runtime option */ 2040 if (product->api_user) { 2041 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2042 ierr = PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2043 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2044 } else { 2045 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2046 ierr = PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2047 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2048 } 2049 if (flg) { 2050 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2051 } 2052 2053 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 2054 C->ops->productsymbolic = MatProductSymbolic_ABt; 2055 PetscFunctionReturn(0); 2056 } 2057 2058 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 2059 { 2060 PetscErrorCode ierr; 2061 Mat_Product *product = C->product; 2062 PetscBool flg = PETSC_FALSE; 2063 PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 2064 #if !defined(PETSC_HAVE_HYPRE) 2065 const char *algTypes[2] = {"scalable","rap"}; 2066 PetscInt nalg = 2; 2067 #else 2068 const char *algTypes[3] = {"scalable","rap","hypre"}; 2069 PetscInt nalg = 3; 2070 #endif 2071 2072 PetscFunctionBegin; 2073 /* Set default algorithm */ 2074 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2075 if (flg) { 2076 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2077 } 2078 2079 /* Get runtime option */ 2080 if (product->api_user) { 2081 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 2082 ierr = PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 2083 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2084 } else { 2085 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 2086 ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 2087 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2088 } 2089 if (flg) { 2090 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2091 } 2092 2093 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 2094 PetscFunctionReturn(0); 2095 } 2096 2097 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 2098 { 2099 PetscErrorCode ierr; 2100 Mat_Product *product = C->product; 2101 PetscBool flg = PETSC_FALSE; 2102 PetscInt alg = 0; /* default algorithm */ 2103 const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 2104 PetscInt nalg = 3; 2105 2106 PetscFunctionBegin; 2107 /* Set default algorithm */ 2108 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2109 if (flg) { 2110 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2111 } 2112 2113 /* Get runtime option */ 2114 if (product->api_user) { 2115 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");CHKERRQ(ierr); 2116 ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 2117 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2118 } else { 2119 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");CHKERRQ(ierr); 2120 ierr = PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 2121 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2122 } 2123 if (flg) { 2124 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2125 } 2126 2127 C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 2132 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 2133 { 2134 PetscErrorCode ierr; 2135 Mat_Product *product = C->product; 2136 PetscInt alg = 0; /* default algorithm */ 2137 PetscBool flg = PETSC_FALSE; 2138 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 2139 PetscInt nalg = 7; 2140 2141 PetscFunctionBegin; 2142 /* Set default algorithm */ 2143 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2146 } 2147 2148 /* Get runtime option */ 2149 if (product->api_user) { 2150 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");CHKERRQ(ierr); 2151 ierr = PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2152 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2153 } else { 2154 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");CHKERRQ(ierr); 2155 ierr = PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2156 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2157 } 2158 if (flg) { 2159 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2160 } 2161 2162 C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 2163 C->ops->productsymbolic = MatProductSymbolic_ABC; 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 2168 { 2169 PetscErrorCode ierr; 2170 Mat_Product *product = C->product; 2171 2172 PetscFunctionBegin; 2173 switch (product->type) { 2174 case MATPRODUCT_AB: 2175 ierr = MatProductSetFromOptions_SeqAIJ_AB(C);CHKERRQ(ierr); 2176 break; 2177 case MATPRODUCT_AtB: 2178 ierr = MatProductSetFromOptions_SeqAIJ_AtB(C);CHKERRQ(ierr); 2179 break; 2180 case MATPRODUCT_ABt: 2181 ierr = MatProductSetFromOptions_SeqAIJ_ABt(C);CHKERRQ(ierr); 2182 break; 2183 case MATPRODUCT_PtAP: 2184 ierr = MatProductSetFromOptions_SeqAIJ_PtAP(C);CHKERRQ(ierr); 2185 break; 2186 case MATPRODUCT_RARt: 2187 ierr = MatProductSetFromOptions_SeqAIJ_RARt(C);CHKERRQ(ierr); 2188 break; 2189 case MATPRODUCT_ABC: 2190 ierr = MatProductSetFromOptions_SeqAIJ_ABC(C);CHKERRQ(ierr); 2191 break; 2192 default: 2193 break; 2194 } 2195 PetscFunctionReturn(0); 2196 } 2197