1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/utils/petscheap.h> 10 #include <petscbt.h> 11 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 15 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 16 { 17 PetscErrorCode ierr; 18 PetscBool scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE; 19 20 PetscFunctionBegin; 21 if (scall == MAT_INITIAL_MATRIX){ 22 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 23 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 24 ierr = PetscOptionsBool("-matmatmult_scalable_fast","Use a scalable but slower C=A*B","",scalable_fast,&scalable_fast,PETSC_NULL);CHKERRQ(ierr); 25 ierr = PetscOptionsBool("-matmatmult_heap","Use heap implementation of symbolic factorization C=A*B","",heap,&heap,PETSC_NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsBool("-matmatmult_btheap","Use btheap implementation of symbolic factorization C=A*B","",btheap,&btheap,PETSC_NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsEnd();CHKERRQ(ierr); 28 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 29 if (scalable_fast){ 30 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr); 31 } else if (scalable){ 32 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 33 } else if (heap) { 34 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr); 35 } else if (btheap) { 36 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr); 37 } else { 38 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 39 } 40 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 41 } 42 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 43 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 44 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 50 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 51 { 52 PetscErrorCode ierr; 53 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 54 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 55 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 56 PetscReal afill; 57 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 58 PetscBT lnkbt; 59 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 60 61 PetscFunctionBegin; 62 /* Get ci and cj */ 63 /*---------------*/ 64 /* Allocate ci array, arrays for fill computation and */ 65 /* free space for accumulating nonzero column info */ 66 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 67 ci[0] = 0; 68 69 /* create and initialize a linked list */ 70 nlnk_max = a->rmax*b->rmax; 71 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; 72 ierr = PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);CHKERRQ(ierr); 73 74 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 75 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 76 current_space = free_space; 77 78 /* Determine ci and cj */ 79 for (i=0; i<am; i++) { 80 anzi = ai[i+1] - ai[i]; 81 aj = a->j + ai[i]; 82 for (j=0; j<anzi; j++){ 83 brow = aj[j]; 84 bnzj = bi[brow+1] - bi[brow]; 85 bj = b->j + bi[brow]; 86 /* add non-zero cols of B into the sorted linked list lnk */ 87 ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 88 } 89 cnzi = lnk[0]; 90 91 /* If free space is not available, make more free space */ 92 /* Double the amount of total space in the list */ 93 if (current_space->local_remaining<cnzi) { 94 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 95 ndouble++; 96 } 97 98 /* Copy data into free space, then initialize lnk */ 99 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 100 current_space->array += cnzi; 101 current_space->local_used += cnzi; 102 current_space->local_remaining -= cnzi; 103 ci[i+1] = ci[i] + cnzi; 104 } 105 106 /* Column indices are in the list of free space */ 107 /* Allocate space for cj, initialize cj, and */ 108 /* destroy list of free space and other temporary array(s) */ 109 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 110 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 111 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 112 113 /* put together the new symbolic matrix */ 114 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 115 (*C)->rmap->bs = A->rmap->bs; 116 (*C)->cmap->bs = B->cmap->bs; 117 118 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 119 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 120 c = (Mat_SeqAIJ *)((*C)->data); 121 c->free_a = PETSC_FALSE; 122 c->free_ij = PETSC_TRUE; 123 c->nonew = 0; 124 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 125 126 /* set MatInfo */ 127 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 128 if (afill < 1.0) afill = 1.0; 129 c->maxnz = ci[am]; 130 c->nz = ci[am]; 131 (*C)->info.mallocs = ndouble; 132 (*C)->info.fill_ratio_given = fill; 133 (*C)->info.fill_ratio_needed = afill; 134 135 #if defined(PETSC_USE_INFO) 136 if (ci[am]) { 137 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 138 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 139 } else { 140 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 141 } 142 #endif 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 148 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 149 { 150 PetscErrorCode ierr; 151 PetscLogDouble flops=0.0; 152 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 153 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 154 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 155 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 156 PetscInt am=A->rmap->n,cm=C->rmap->n; 157 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 158 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 159 PetscScalar *ab_dense; 160 161 PetscFunctionBegin; 162 /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */ 163 if (!c->a){ /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 164 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 165 c->a = ca; 166 c->free_a = PETSC_TRUE; 167 168 ierr = PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);CHKERRQ(ierr); 169 ierr = PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 170 c->matmult_abdense = ab_dense; 171 } else { 172 ca = c->a; 173 ab_dense = c->matmult_abdense; 174 } 175 176 /* clean old values in C */ 177 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 178 /* Traverse A row-wise. */ 179 /* Build the ith row in C by summing over nonzero columns in A, */ 180 /* the rows of B corresponding to nonzeros of A. */ 181 for (i=0; i<am; i++) { 182 anzi = ai[i+1] - ai[i]; 183 for (j=0; j<anzi; j++) { 184 brow = aj[j]; 185 bnzi = bi[brow+1] - bi[brow]; 186 bjj = bj + bi[brow]; 187 baj = ba + bi[brow]; 188 /* perform dense axpy */ 189 valtmp = aa[j]; 190 for (k=0; k<bnzi; k++) { 191 ab_dense[bjj[k]] += valtmp*baj[k]; 192 } 193 flops += 2*bnzi; 194 } 195 aj += anzi; aa += anzi; 196 197 cnzi = ci[i+1] - ci[i]; 198 for (k=0; k<cnzi; k++) { 199 ca[k] += ab_dense[cj[k]]; 200 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 201 } 202 flops += cnzi; 203 cj += cnzi; ca += cnzi; 204 } 205 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 206 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 207 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 213 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 214 { 215 PetscErrorCode ierr; 216 PetscLogDouble flops=0.0; 217 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 218 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 219 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 220 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 221 PetscInt am=A->rmap->N,cm=C->rmap->N; 222 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 223 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 224 PetscInt nextb; 225 226 PetscFunctionBegin; 227 /* clean old values in C */ 228 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 229 /* Traverse A row-wise. */ 230 /* Build the ith row in C by summing over nonzero columns in A, */ 231 /* the rows of B corresponding to nonzeros of A. */ 232 for (i=0;i<am;i++) { 233 anzi = ai[i+1] - ai[i]; 234 cnzi = ci[i+1] - ci[i]; 235 for (j=0;j<anzi;j++) { 236 brow = aj[j]; 237 bnzi = bi[brow+1] - bi[brow]; 238 bjj = bj + bi[brow]; 239 baj = ba + bi[brow]; 240 /* perform sparse axpy */ 241 valtmp = aa[j]; 242 nextb = 0; 243 for (k=0; nextb<bnzi; k++) { 244 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 245 ca[k] += valtmp*baj[nextb++]; 246 } 247 } 248 flops += 2*bnzi; 249 } 250 aj += anzi; aa += anzi; 251 cj += cnzi; ca += cnzi; 252 } 253 254 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 256 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast" 262 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat *C) 263 { 264 PetscErrorCode ierr; 265 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 266 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 267 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 268 MatScalar *ca; 269 PetscReal afill; 270 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 273 PetscFunctionBegin; 274 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 275 /*-----------------------------------------------------------------------------------------*/ 276 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 277 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 278 ci[0] = 0; 279 280 /* create and initialize a linked list */ 281 nlnk_max = a->rmax*b->rmax; 282 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */ 283 ierr = PetscLLCondensedCreate_fast(nlnk_max,&lnk);CHKERRQ(ierr); 284 285 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 286 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 287 current_space = free_space; 288 289 /* Determine ci and cj */ 290 for (i=0; i<am; i++) { 291 anzi = ai[i+1] - ai[i]; 292 aj = a->j + ai[i]; 293 for (j=0; j<anzi; j++){ 294 brow = aj[j]; 295 bnzj = bi[brow+1] - bi[brow]; 296 bj = b->j + bi[brow]; 297 /* add non-zero cols of B into the sorted linked list lnk */ 298 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 299 } 300 cnzi = lnk[1]; 301 302 /* If free space is not available, make more free space */ 303 /* Double the amount of total space in the list */ 304 if (current_space->local_remaining<cnzi) { 305 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 306 ndouble++; 307 } 308 309 /* Copy data into free space, then initialize lnk */ 310 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 311 current_space->array += cnzi; 312 current_space->local_used += cnzi; 313 current_space->local_remaining -= cnzi; 314 ci[i+1] = ci[i] + cnzi; 315 } 316 317 /* Column indices are in the list of free space */ 318 /* Allocate space for cj, initialize cj, and */ 319 /* destroy list of free space and other temporary array(s) */ 320 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 321 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 322 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 323 324 /* Allocate space for ca */ 325 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 326 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 327 328 /* put together the new symbolic matrix */ 329 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 330 (*C)->rmap->bs = A->rmap->bs; 331 (*C)->cmap->bs = B->cmap->bs; 332 333 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 334 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 335 c = (Mat_SeqAIJ *)((*C)->data); 336 c->free_a = PETSC_TRUE; 337 c->free_ij = PETSC_TRUE; 338 c->nonew = 0; 339 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 340 341 /* set MatInfo */ 342 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 343 if (afill < 1.0) afill = 1.0; 344 c->maxnz = ci[am]; 345 c->nz = ci[am]; 346 (*C)->info.mallocs = ndouble; 347 (*C)->info.fill_ratio_given = fill; 348 (*C)->info.fill_ratio_needed = afill; 349 350 #if defined(PETSC_USE_INFO) 351 if (ci[am]) { 352 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 353 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 354 } else { 355 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 356 } 357 #endif 358 PetscFunctionReturn(0); 359 } 360 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 364 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 365 { 366 PetscErrorCode ierr; 367 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 368 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 369 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 370 MatScalar *ca; 371 PetscReal afill; 372 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 373 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 374 375 PetscFunctionBegin; 376 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 377 /*---------------------------------------------------------------------------------------------*/ 378 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 379 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 380 ci[0] = 0; 381 382 /* create and initialize a linked list */ 383 nlnk_max = a->rmax*b->rmax; 384 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */ 385 ierr = PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);CHKERRQ(ierr); 386 387 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 388 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 389 current_space = free_space; 390 391 /* Determine ci and cj */ 392 for (i=0; i<am; i++) { 393 anzi = ai[i+1] - ai[i]; 394 aj = a->j + ai[i]; 395 for (j=0; j<anzi; j++){ 396 brow = aj[j]; 397 bnzj = bi[brow+1] - bi[brow]; 398 bj = b->j + bi[brow]; 399 /* add non-zero cols of B into the sorted linked list lnk */ 400 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 401 } 402 cnzi = lnk[0]; 403 404 /* If free space is not available, make more free space */ 405 /* Double the amount of total space in the list */ 406 if (current_space->local_remaining<cnzi) { 407 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 408 ndouble++; 409 } 410 411 /* Copy data into free space, then initialize lnk */ 412 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 413 current_space->array += cnzi; 414 current_space->local_used += cnzi; 415 current_space->local_remaining -= cnzi; 416 ci[i+1] = ci[i] + cnzi; 417 } 418 419 /* Column indices are in the list of free space */ 420 /* Allocate space for cj, initialize cj, and */ 421 /* destroy list of free space and other temporary array(s) */ 422 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 423 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 424 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 425 426 /* Allocate space for ca */ 427 /*-----------------------*/ 428 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 429 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 430 431 /* put together the new symbolic matrix */ 432 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 433 (*C)->rmap->bs = A->rmap->bs; 434 (*C)->cmap->bs = B->cmap->bs; 435 436 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 437 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 438 c = (Mat_SeqAIJ *)((*C)->data); 439 c->free_a = PETSC_TRUE; 440 c->free_ij = PETSC_TRUE; 441 c->nonew = 0; 442 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 443 444 /* set MatInfo */ 445 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 446 if (afill < 1.0) afill = 1.0; 447 c->maxnz = ci[am]; 448 c->nz = ci[am]; 449 (*C)->info.mallocs = ndouble; 450 (*C)->info.fill_ratio_given = fill; 451 (*C)->info.fill_ratio_needed = afill; 452 453 #if defined(PETSC_USE_INFO) 454 if (ci[am]) { 455 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 456 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 457 } else { 458 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 459 } 460 #endif 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap" 466 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat *C) 467 { 468 PetscErrorCode ierr; 469 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 470 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 471 PetscInt *ci,*cj,*bb; 472 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 473 PetscReal afill; 474 PetscInt i,j,col,ndouble = 0; 475 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 476 PetscHeap h; 477 478 PetscFunctionBegin; 479 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 480 /*---------------------------------------------------------------------------------------------*/ 481 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 482 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 483 ci[0] = 0; 484 485 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 486 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 487 current_space = free_space; 488 489 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 490 ierr = PetscMalloc(a->rmax*sizeof(PetscInt),&bb);CHKERRQ(ierr); 491 492 /* Determine ci and cj */ 493 for (i=0; i<am; i++) { 494 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 495 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 496 ci[i+1] = ci[i]; 497 /* Populate the min heap */ 498 for (j=0; j<anzi; j++) { 499 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 500 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 501 ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr); 502 } 503 } 504 /* Pick off the min element, adding it to free space */ 505 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 506 while (j >= 0) { 507 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 508 ierr = PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);CHKERRQ(ierr); 509 ndouble++; 510 } 511 *(current_space->array++) = col; 512 current_space->local_used++; 513 current_space->local_remaining--; 514 ci[i+1]++; 515 516 /* stash if anything else remains in this row of B */ 517 if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);} 518 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 519 PetscInt j2,col2; 520 ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr); 521 if (col2 != col) break; 522 ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr); 523 if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);} 524 } 525 /* Put any stashed elements back into the min heap */ 526 ierr = PetscHeapUnstash(h);CHKERRQ(ierr); 527 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 528 } 529 } 530 ierr = PetscFree(bb);CHKERRQ(ierr); 531 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 532 533 /* Column indices are in the list of free space */ 534 /* Allocate space for cj, initialize cj, and */ 535 /* destroy list of free space and other temporary array(s) */ 536 ierr = PetscMalloc(ci[am]*sizeof(PetscInt),&cj);CHKERRQ(ierr); 537 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 538 539 /* put together the new symbolic matrix */ 540 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 541 (*C)->rmap->bs = A->rmap->bs; 542 (*C)->cmap->bs = B->cmap->bs; 543 544 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 545 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 546 c = (Mat_SeqAIJ *)((*C)->data); 547 c->free_a = PETSC_TRUE; 548 c->free_ij = PETSC_TRUE; 549 c->nonew = 0; 550 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 551 552 /* set MatInfo */ 553 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 554 if (afill < 1.0) afill = 1.0; 555 c->maxnz = ci[am]; 556 c->nz = ci[am]; 557 (*C)->info.mallocs = ndouble; 558 (*C)->info.fill_ratio_given = fill; 559 (*C)->info.fill_ratio_needed = afill; 560 561 #if defined(PETSC_USE_INFO) 562 if (ci[am]) { 563 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 564 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 565 } else { 566 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 567 } 568 #endif 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap" 574 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat *C) 575 { 576 PetscErrorCode ierr; 577 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 578 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 579 PetscInt *ci,*cj,*bb; 580 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 581 PetscReal afill; 582 PetscInt i,j,col,ndouble = 0; 583 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 584 PetscHeap h; 585 PetscBT bt; 586 587 PetscFunctionBegin; 588 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 589 /*---------------------------------------------------------------------------------------------*/ 590 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 591 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 592 ci[0] = 0; 593 594 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 595 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 596 current_space = free_space; 597 598 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 599 ierr = PetscMalloc(a->rmax*sizeof(PetscInt),&bb);CHKERRQ(ierr); 600 ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr); 601 602 /* Determine ci and cj */ 603 for (i=0; i<am; i++) { 604 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 605 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 606 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 607 ci[i+1] = ci[i]; 608 /* Populate the min heap */ 609 for (j=0; j<anzi; j++) { 610 PetscInt brow = acol[j]; 611 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 612 PetscInt bcol = bj[bb[j]]; 613 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 614 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 615 bb[j]++; 616 break; 617 } 618 } 619 } 620 /* Pick off the min element, adding it to free space */ 621 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 622 while (j >= 0) { 623 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 624 fptr = PETSC_NULL; /* need PetscBTMemzero */ 625 ierr = PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);CHKERRQ(ierr); 626 ndouble++; 627 } 628 *(current_space->array++) = col; 629 current_space->local_used++; 630 current_space->local_remaining--; 631 ci[i+1]++; 632 633 /* stash if anything else remains in this row of B */ 634 for ( ; bb[j] < bi[acol[j]+1]; bb[j]++) { 635 PetscInt bcol = bj[bb[j]]; 636 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 637 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 638 bb[j]++; 639 break; 640 } 641 } 642 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 643 } 644 if (fptr) { /* Clear the bits for this row */ 645 for ( ; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);} 646 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 647 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 648 } 649 } 650 ierr = PetscFree(bb);CHKERRQ(ierr); 651 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 652 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 653 654 /* Column indices are in the list of free space */ 655 /* Allocate space for cj, initialize cj, and */ 656 /* destroy list of free space and other temporary array(s) */ 657 ierr = PetscMalloc(ci[am]*sizeof(PetscInt),&cj);CHKERRQ(ierr); 658 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 659 660 /* put together the new symbolic matrix */ 661 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 662 (*C)->rmap->bs = A->rmap->bs; 663 (*C)->cmap->bs = B->cmap->bs; 664 665 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 666 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 667 c = (Mat_SeqAIJ *)((*C)->data); 668 c->free_a = PETSC_TRUE; 669 c->free_ij = PETSC_TRUE; 670 c->nonew = 0; 671 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 672 673 /* set MatInfo */ 674 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 675 if (afill < 1.0) afill = 1.0; 676 c->maxnz = ci[am]; 677 c->nz = ci[am]; 678 (*C)->info.mallocs = ndouble; 679 (*C)->info.fill_ratio_given = fill; 680 (*C)->info.fill_ratio_needed = afill; 681 682 #if defined(PETSC_USE_INFO) 683 if (ci[am]) { 684 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 685 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 686 } else { 687 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 688 } 689 #endif 690 PetscFunctionReturn(0); 691 } 692 693 /* This routine is not used. Should be removed! */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 696 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 697 { 698 PetscErrorCode ierr; 699 700 PetscFunctionBegin; 701 if (scall == MAT_INITIAL_MATRIX){ 702 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 703 } 704 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 710 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 711 { 712 PetscErrorCode ierr; 713 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 714 715 PetscFunctionBegin; 716 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 717 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 718 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 719 ierr = PetscFree(multtrans);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 725 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 726 { 727 PetscErrorCode ierr; 728 PetscContainer container; 729 Mat_MatMatTransMult *multtrans=PETSC_NULL; 730 731 PetscFunctionBegin; 732 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 733 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 734 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 735 A->ops->destroy = multtrans->destroy; 736 if (A->ops->destroy) { 737 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 738 } 739 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 745 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 746 { 747 PetscErrorCode ierr; 748 Mat Bt; 749 PetscInt *bti,*btj; 750 Mat_MatMatTransMult *multtrans; 751 PetscContainer container; 752 753 PetscFunctionBegin; 754 /* create symbolic Bt */ 755 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 756 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 757 Bt->rmap->bs = A->cmap->bs; 758 Bt->cmap->bs = B->cmap->bs; 759 760 /* get symbolic C=A*Bt */ 761 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 762 763 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 764 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 765 766 /* attach the supporting struct to C */ 767 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 768 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 769 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 770 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 771 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 772 773 multtrans->usecoloring = PETSC_FALSE; 774 multtrans->destroy = (*C)->ops->destroy; 775 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 776 777 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 778 if (multtrans->usecoloring){ 779 /* Create MatTransposeColoring from symbolic C=A*B^T */ 780 MatTransposeColoring matcoloring; 781 ISColoring iscoloring; 782 Mat Bt_dense,C_dense; 783 784 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 785 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 786 multtrans->matcoloring = matcoloring; 787 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 788 789 /* Create Bt_dense and C_dense = A*Bt_dense */ 790 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 791 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 792 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 793 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 794 Bt_dense->assembled = PETSC_TRUE; 795 multtrans->Bt_den = Bt_dense; 796 797 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 798 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 799 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 800 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 801 Bt_dense->assembled = PETSC_TRUE; 802 multtrans->ABt_den = C_dense; 803 804 #if defined(PETSC_USE_INFO) 805 { 806 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 807 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 808 } 809 #endif 810 } 811 /* clean up */ 812 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 813 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 814 815 816 817 #if defined(INEFFICIENT_ALGORITHM) 818 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 819 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 820 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 821 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 822 PetscInt am=A->rmap->N,bm=B->rmap->N; 823 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 824 MatScalar *ca; 825 PetscBT lnkbt; 826 PetscReal afill; 827 828 /* Allocate row pointer array ci */ 829 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 830 ci[0] = 0; 831 832 /* Create and initialize a linked list for C columns */ 833 nlnk = bm+1; 834 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 835 836 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 837 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 838 current_space = free_space; 839 840 /* Determine symbolic info for each row of the product A*B^T: */ 841 for (i=0; i<am; i++) { 842 anzi = ai[i+1] - ai[i]; 843 cnzi = 0; 844 acol = aj + ai[i]; 845 for (j=0; j<bm; j++){ 846 bnzj = bi[j+1] - bi[j]; 847 bcol= bj + bi[j]; 848 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 849 ka = 0; kb = 0; 850 while (ka < anzi && kb < bnzj){ 851 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 852 if (ka == anzi) break; 853 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 854 if (kb == bnzj) break; 855 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 856 index[0] = j; 857 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 858 cnzi++; 859 break; 860 } 861 } 862 } 863 864 /* If free space is not available, make more free space */ 865 /* Double the amount of total space in the list */ 866 if (current_space->local_remaining<cnzi) { 867 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 868 nspacedouble++; 869 } 870 871 /* Copy data into free space, then initialize lnk */ 872 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 873 current_space->array += cnzi; 874 current_space->local_used += cnzi; 875 current_space->local_remaining -= cnzi; 876 877 ci[i+1] = ci[i] + cnzi; 878 } 879 880 881 /* Column indices are in the list of free space. 882 Allocate array cj, copy column indices to cj, and destroy list of free space */ 883 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 884 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 885 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 886 887 /* Allocate space for ca */ 888 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 889 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 890 891 /* put together the new symbolic matrix */ 892 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 893 (*C)->rmap->bs = A->cmap->bs; 894 (*C)->cmap->bs = B->cmap->bs; 895 896 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 897 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 898 c = (Mat_SeqAIJ *)((*C)->data); 899 c->free_a = PETSC_TRUE; 900 c->free_ij = PETSC_TRUE; 901 c->nonew = 0; 902 903 /* set MatInfo */ 904 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 905 if (afill < 1.0) afill = 1.0; 906 c->maxnz = ci[am]; 907 c->nz = ci[am]; 908 (*C)->info.mallocs = nspacedouble; 909 (*C)->info.fill_ratio_given = fill; 910 (*C)->info.fill_ratio_needed = afill; 911 912 #if defined(PETSC_USE_INFO) 913 if (ci[am]) { 914 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 915 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 916 } else { 917 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 918 } 919 #endif 920 #endif 921 PetscFunctionReturn(0); 922 } 923 924 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 927 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 928 { 929 PetscErrorCode ierr; 930 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 931 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 932 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 933 PetscLogDouble flops=0.0; 934 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca,*cval; 935 Mat_MatMatTransMult *multtrans; 936 PetscContainer container; 937 #if defined(USE_ARRAY) 938 MatScalar *spdot; 939 #endif 940 941 PetscFunctionBegin; 942 /* clear old values in C */ 943 if (!c->a){ 944 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 945 c->a = ca; 946 c->free_a = PETSC_TRUE; 947 } else { 948 ca = c->a; 949 } 950 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 951 952 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 953 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 954 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 955 if (multtrans->usecoloring){ 956 MatTransposeColoring matcoloring = multtrans->matcoloring; 957 Mat Bt_dense; 958 PetscInt m,n; 959 Mat C_dense = multtrans->ABt_den; 960 961 Bt_dense = multtrans->Bt_den; 962 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 963 964 /* Get Bt_dense by Apply MatTransposeColoring to B */ 965 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 966 967 /* C_dense = A*Bt_dense */ 968 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 969 970 /* Recover C from C_dense */ 971 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #if defined(USE_ARRAY) 976 /* allocate an array for implementing sparse inner-product */ 977 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 978 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 979 #endif 980 981 for (i=0; i<cm; i++) { 982 anzi = ai[i+1] - ai[i]; 983 acol = aj + ai[i]; 984 aval = aa + ai[i]; 985 cnzi = ci[i+1] - ci[i]; 986 ccol = cj + ci[i]; 987 cval = ca + ci[i]; 988 for (j=0; j<cnzi; j++){ 989 brow = ccol[j]; 990 bnzj = bi[brow+1] - bi[brow]; 991 bcol = bj + bi[brow]; 992 bval = ba + bi[brow]; 993 994 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 995 #if defined(USE_ARRAY) 996 /* put ba to spdot array */ 997 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 998 /* c(i,j)=A[i,:]*B[j,:]^T */ 999 for (nexta=0; nexta<anzi; nexta++){ 1000 cval[j] += spdot[acol[nexta]]*aval[nexta]; 1001 } 1002 /* zero spdot array */ 1003 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 1004 #else 1005 nexta = 0; nextb = 0; 1006 while (nexta<anzi && nextb<bnzj){ 1007 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 1008 if (nexta == anzi) break; 1009 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 1010 if (nextb == bnzj) break; 1011 if (acol[nexta] == bcol[nextb]){ 1012 cval[j] += aval[nexta]*bval[nextb]; 1013 nexta++; nextb++; 1014 flops += 2; 1015 } 1016 } 1017 #endif 1018 } 1019 } 1020 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1021 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1023 #if defined(USE_ARRAY) 1024 ierr = PetscFree(spdot); 1025 #endif 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 1031 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 if (scall == MAT_INITIAL_MATRIX){ 1036 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 1037 } 1038 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 1044 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 1045 { 1046 PetscErrorCode ierr; 1047 Mat At; 1048 PetscInt *ati,*atj; 1049 1050 PetscFunctionBegin; 1051 /* create symbolic At */ 1052 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1053 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 1054 At->rmap->bs = A->cmap->bs; 1055 At->cmap->bs = B->cmap->bs; 1056 1057 /* get symbolic C=At*B */ 1058 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 1059 1060 /* clean up */ 1061 ierr = MatDestroy(&At);CHKERRQ(ierr); 1062 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 1068 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1069 { 1070 PetscErrorCode ierr; 1071 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1072 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1073 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1074 PetscLogDouble flops=0.0; 1075 MatScalar *aa=a->a,*ba,*ca,*caj; 1076 1077 PetscFunctionBegin; 1078 if (!c->a){ 1079 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1080 c->a = ca; 1081 c->free_a = PETSC_TRUE; 1082 } else { 1083 ca = c->a; 1084 } 1085 /* clear old values in C */ 1086 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1087 1088 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1089 for (i=0;i<am;i++) { 1090 bj = b->j + bi[i]; 1091 ba = b->a + bi[i]; 1092 bnzi = bi[i+1] - bi[i]; 1093 anzi = ai[i+1] - ai[i]; 1094 for (j=0; j<anzi; j++) { 1095 nextb = 0; 1096 crow = *aj++; 1097 cjj = cj + ci[crow]; 1098 caj = ca + ci[crow]; 1099 /* perform sparse axpy operation. Note cjj includes bj. */ 1100 for (k=0; nextb<bnzi; k++) { 1101 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1102 caj[k] += (*aa)*(*(ba+nextb)); 1103 nextb++; 1104 } 1105 } 1106 flops += 2*bnzi; 1107 aa++; 1108 } 1109 } 1110 1111 /* Assemble the final matrix and clean up */ 1112 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 EXTERN_C_BEGIN 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 1121 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1122 { 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 if (scall == MAT_INITIAL_MATRIX){ 1127 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1128 } 1129 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 EXTERN_C_END 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1136 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1142 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1148 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1149 { 1150 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1153 MatScalar *aa; 1154 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1155 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1156 1157 PetscFunctionBegin; 1158 if (!cm || !cn) PetscFunctionReturn(0); 1159 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1160 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1161 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1162 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1163 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 1164 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1165 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1166 colam = col*am; 1167 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1168 r1 = r2 = r3 = r4 = 0.0; 1169 n = a->i[i+1] - a->i[i]; 1170 aj = a->j + a->i[i]; 1171 aa = a->a + a->i[i]; 1172 for (j=0; j<n; j++) { 1173 r1 += (*aa)*b1[*aj]; 1174 r2 += (*aa)*b2[*aj]; 1175 r3 += (*aa)*b3[*aj]; 1176 r4 += (*aa++)*b4[*aj++]; 1177 } 1178 c[colam + i] = r1; 1179 c[colam + am + i] = r2; 1180 c[colam + am2 + i] = r3; 1181 c[colam + am3 + i] = r4; 1182 } 1183 b1 += bm4; 1184 b2 += bm4; 1185 b3 += bm4; 1186 b4 += bm4; 1187 } 1188 for (;col<cn; col++){ /* over extra columns of C */ 1189 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1190 r1 = 0.0; 1191 n = a->i[i+1] - a->i[i]; 1192 aj = a->j + a->i[i]; 1193 aa = a->a + a->i[i]; 1194 1195 for (j=0; j<n; j++) { 1196 r1 += (*aa++)*b1[*aj++]; 1197 } 1198 c[col*am + i] = r1; 1199 } 1200 b1 += bm; 1201 } 1202 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1203 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1204 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 1205 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1206 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 /* 1211 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1215 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1216 { 1217 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1218 PetscErrorCode ierr; 1219 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1220 MatScalar *aa; 1221 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1222 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1223 1224 PetscFunctionBegin; 1225 if (!cm || !cn) PetscFunctionReturn(0); 1226 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1227 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 1228 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1229 1230 if (a->compressedrow.use){ /* use compressed row format */ 1231 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1232 colam = col*am; 1233 arm = a->compressedrow.nrows; 1234 ii = a->compressedrow.i; 1235 ridx = a->compressedrow.rindex; 1236 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1237 r1 = r2 = r3 = r4 = 0.0; 1238 n = ii[i+1] - ii[i]; 1239 aj = a->j + ii[i]; 1240 aa = a->a + ii[i]; 1241 for (j=0; j<n; j++) { 1242 r1 += (*aa)*b1[*aj]; 1243 r2 += (*aa)*b2[*aj]; 1244 r3 += (*aa)*b3[*aj]; 1245 r4 += (*aa++)*b4[*aj++]; 1246 } 1247 c[colam + ridx[i]] += r1; 1248 c[colam + am + ridx[i]] += r2; 1249 c[colam + am2 + ridx[i]] += r3; 1250 c[colam + am3 + ridx[i]] += r4; 1251 } 1252 b1 += bm4; 1253 b2 += bm4; 1254 b3 += bm4; 1255 b4 += bm4; 1256 } 1257 for (;col<cn; col++){ /* over extra columns of C */ 1258 colam = col*am; 1259 arm = a->compressedrow.nrows; 1260 ii = a->compressedrow.i; 1261 ridx = a->compressedrow.rindex; 1262 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1263 r1 = 0.0; 1264 n = ii[i+1] - ii[i]; 1265 aj = a->j + ii[i]; 1266 aa = a->a + ii[i]; 1267 1268 for (j=0; j<n; j++) { 1269 r1 += (*aa++)*b1[*aj++]; 1270 } 1271 c[colam + ridx[i]] += r1; 1272 } 1273 b1 += bm; 1274 } 1275 } else { 1276 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1277 colam = col*am; 1278 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1279 r1 = r2 = r3 = r4 = 0.0; 1280 n = a->i[i+1] - a->i[i]; 1281 aj = a->j + a->i[i]; 1282 aa = a->a + a->i[i]; 1283 for (j=0; j<n; j++) { 1284 r1 += (*aa)*b1[*aj]; 1285 r2 += (*aa)*b2[*aj]; 1286 r3 += (*aa)*b3[*aj]; 1287 r4 += (*aa++)*b4[*aj++]; 1288 } 1289 c[colam + i] += r1; 1290 c[colam + am + i] += r2; 1291 c[colam + am2 + i] += r3; 1292 c[colam + am3 + i] += r4; 1293 } 1294 b1 += bm4; 1295 b2 += bm4; 1296 b3 += bm4; 1297 b4 += bm4; 1298 } 1299 for (;col<cn; col++){ /* over extra columns of C */ 1300 colam = col*am; 1301 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1302 r1 = 0.0; 1303 n = a->i[i+1] - a->i[i]; 1304 aj = a->j + a->i[i]; 1305 aa = a->a + a->i[i]; 1306 1307 for (j=0; j<n; j++) { 1308 r1 += (*aa++)*b1[*aj++]; 1309 } 1310 c[colam + i] += r1; 1311 } 1312 b1 += bm; 1313 } 1314 } 1315 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1316 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1317 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1323 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1324 { 1325 PetscErrorCode ierr; 1326 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1327 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1328 PetscInt *bi=b->i,*bj=b->j; 1329 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1330 MatScalar *btval,*btval_den,*ba=b->a; 1331 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1332 1333 PetscFunctionBegin; 1334 btval_den=btdense->v; 1335 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1336 for (k=0; k<ncolors; k++) { 1337 ncolumns = coloring->ncolumns[k]; 1338 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1339 col = *(columns + colorforcol[k] + l); 1340 btcol = bj + bi[col]; 1341 btval = ba + bi[col]; 1342 anz = bi[col+1] - bi[col]; 1343 for (j=0; j<anz; j++){ 1344 brow = btcol[j]; 1345 btval_den[brow] = btval[j]; 1346 } 1347 } 1348 btval_den += m; 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1355 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1356 { 1357 PetscErrorCode ierr; 1358 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1359 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1360 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1361 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1362 1363 PetscFunctionBegin; 1364 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1365 ierr = MatDenseGetArray(Cden,&ca_den);CHKERRQ(ierr); 1366 cp_den = ca_den; 1367 for (k=0; k<ncolors; k++) { 1368 nrows = matcoloring->nrows[k]; 1369 row = rows + colorforrow[k]; 1370 idx = spidx + colorforrow[k]; 1371 for (l=0; l<nrows; l++){ 1372 ca[idx[l]] = cp_den[row[l]]; 1373 } 1374 cp_den += m; 1375 } 1376 ierr = MatDenseRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /* 1381 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1382 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1383 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1384 */ 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1387 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1388 { 1389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1392 PetscInt nz = a->i[m],row,*jj,mr,col; 1393 PetscInt *cspidx; 1394 1395 PetscFunctionBegin; 1396 *nn = n; 1397 if (!ia) PetscFunctionReturn(0); 1398 if (symmetric) { 1399 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1400 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,(PetscInt**)ia,(PetscInt**)ja);CHKERRQ(ierr); 1401 } else { 1402 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1403 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1404 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1405 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1406 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1407 jj = a->j; 1408 for (i=0; i<nz; i++) { 1409 collengths[jj[i]]++; 1410 } 1411 cia[0] = oshift; 1412 for (i=0; i<n; i++) { 1413 cia[i+1] = cia[i] + collengths[i]; 1414 } 1415 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1416 jj = a->j; 1417 for (row=0; row<m; row++) { 1418 mr = a->i[row+1] - a->i[row]; 1419 for (i=0; i<mr; i++) { 1420 col = *jj++; 1421 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1422 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1423 } 1424 } 1425 ierr = PetscFree(collengths);CHKERRQ(ierr); 1426 *ia = cia; *ja = cja; 1427 *spidx = cspidx; 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1434 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1435 { 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 if (!ia) PetscFunctionReturn(0); 1440 1441 ierr = PetscFree(*ia);CHKERRQ(ierr); 1442 ierr = PetscFree(*ja);CHKERRQ(ierr); 1443 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1449 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1450 { 1451 PetscErrorCode ierr; 1452 PetscInt i,n,nrows,N,j,k,m,ncols,col,cm; 1453 const PetscInt *is,*ci,*cj,*row_idx; 1454 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1455 IS *isa; 1456 PetscBool flg1,flg2; 1457 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1458 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1459 PetscInt *colorforcol,*columns,*columns_i; 1460 1461 PetscFunctionBegin; 1462 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1463 1464 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1465 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1466 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1467 if (flg1 || flg2) { 1468 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1469 } 1470 1471 N = mat->cmap->N/bs; 1472 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1473 c->N = mat->cmap->N/bs; 1474 c->m = mat->rmap->N/bs; 1475 c->rstart = 0; 1476 1477 c->ncolors = nis; 1478 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1479 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1480 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1481 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1482 colorforrow[0] = 0; 1483 rows_i = rows; 1484 columnsforspidx_i = columnsforspidx; 1485 1486 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1487 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1488 colorforcol[0] = 0; 1489 columns_i = columns; 1490 1491 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,PETSC_NULL);CHKERRQ(ierr); /* column-wise storage of mat */ 1492 1493 cm = c->m; 1494 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1495 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1496 for (i=0; i<nis; i++) { 1497 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1498 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1499 c->ncolumns[i] = n; 1500 if (n) { 1501 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1502 } 1503 colorforcol[i+1] = colorforcol[i] + n; 1504 columns_i += n; 1505 1506 /* fast, crude version requires O(N*N) work */ 1507 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1508 1509 /* loop over columns*/ 1510 for (j=0; j<n; j++) { 1511 col = is[j]; 1512 row_idx = cj + ci[col]; 1513 m = ci[col+1] - ci[col]; 1514 /* loop over columns marking them in rowhit */ 1515 for (k=0; k<m; k++) { 1516 idxhit[*row_idx] = spidx[ci[col] + k]; 1517 rowhit[*row_idx++] = col + 1; 1518 } 1519 } 1520 /* count the number of hits */ 1521 nrows = 0; 1522 for (j=0; j<cm; j++) { 1523 if (rowhit[j]) nrows++; 1524 } 1525 c->nrows[i] = nrows; 1526 colorforrow[i+1] = colorforrow[i] + nrows; 1527 1528 nrows = 0; 1529 for (j=0; j<cm; j++) { 1530 if (rowhit[j]) { 1531 rows_i[nrows] = j; 1532 columnsforspidx_i[nrows] = idxhit[j]; 1533 nrows++; 1534 } 1535 } 1536 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1537 rows_i += nrows; columnsforspidx_i += nrows; 1538 } 1539 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,PETSC_NULL);CHKERRQ(ierr); 1540 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1541 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1542 #if defined(PETSC_USE_DEBUG) 1543 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1544 #endif 1545 1546 c->colorforrow = colorforrow; 1547 c->rows = rows; 1548 c->columnsforspidx = columnsforspidx; 1549 c->colorforcol = colorforcol; 1550 c->columns = columns; 1551 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554