1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/utils/petscheap.h> 10 #include <petscbt.h> 11 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 12 /* 13 #define DEBUG_MATMATMULT 14 */ 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 18 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 19 { 20 PetscErrorCode ierr; 21 PetscBool scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE; 22 23 PetscFunctionBegin; 24 if (scall == MAT_INITIAL_MATRIX){ 25 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 26 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsBool("-matmatmult_scalable_fast","Use a scalable but slower C=A*B","",scalable_fast,&scalable_fast,PETSC_NULL);CHKERRQ(ierr); 28 ierr = PetscOptionsBool("-matmatmult_heap","Use heap implementation of symbolic factorization C=A*B","",heap,&heap,PETSC_NULL);CHKERRQ(ierr); 29 ierr = PetscOptionsBool("-matmatmult_btheap","Use btheap implementation of symbolic factorization C=A*B","",btheap,&btheap,PETSC_NULL);CHKERRQ(ierr); 30 ierr = PetscOptionsEnd();CHKERRQ(ierr); 31 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 32 if (scalable_fast){ 33 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr); 34 } else if (scalable){ 35 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 36 } else if (heap) { 37 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr); 38 } else if (btheap) { 39 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr); 40 } else { 41 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 44 } 45 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 46 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 47 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 53 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 54 { 55 PetscErrorCode ierr; 56 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 57 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 58 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 59 PetscReal afill; 60 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 61 PetscBT lnkbt; 62 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 63 64 PetscFunctionBegin; 65 /* Get ci and cj */ 66 /*---------------*/ 67 /* Allocate ci array, arrays for fill computation and */ 68 /* free space for accumulating nonzero column info */ 69 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 70 ci[0] = 0; 71 72 /* create and initialize a linked list */ 73 nlnk_max = a->rmax*b->rmax; 74 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; 75 ierr = PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);CHKERRQ(ierr); 76 77 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 78 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 79 current_space = free_space; 80 81 /* Determine ci and cj */ 82 for (i=0; i<am; i++) { 83 anzi = ai[i+1] - ai[i]; 84 aj = a->j + ai[i]; 85 for (j=0; j<anzi; j++){ 86 brow = aj[j]; 87 bnzj = bi[brow+1] - bi[brow]; 88 bj = b->j + bi[brow]; 89 /* add non-zero cols of B into the sorted linked list lnk */ 90 ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 91 } 92 cnzi = lnk[0]; 93 94 /* If free space is not available, make more free space */ 95 /* Double the amount of total space in the list */ 96 if (current_space->local_remaining<cnzi) { 97 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 98 ndouble++; 99 } 100 101 /* Copy data into free space, then initialize lnk */ 102 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 103 current_space->array += cnzi; 104 current_space->local_used += cnzi; 105 current_space->local_remaining -= cnzi; 106 ci[i+1] = ci[i] + cnzi; 107 } 108 109 /* Column indices are in the list of free space */ 110 /* Allocate space for cj, initialize cj, and */ 111 /* destroy list of free space and other temporary array(s) */ 112 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 113 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 114 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 115 116 /* put together the new symbolic matrix */ 117 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 118 (*C)->rmap->bs = A->rmap->bs; 119 (*C)->cmap->bs = B->cmap->bs; 120 121 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 122 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 123 c = (Mat_SeqAIJ *)((*C)->data); 124 c->free_a = PETSC_FALSE; 125 c->free_ij = PETSC_TRUE; 126 c->nonew = 0; 127 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 128 129 /* set MatInfo */ 130 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 131 if (afill < 1.0) afill = 1.0; 132 c->maxnz = ci[am]; 133 c->nz = ci[am]; 134 (*C)->info.mallocs = ndouble; 135 (*C)->info.fill_ratio_given = fill; 136 (*C)->info.fill_ratio_needed = afill; 137 138 #if defined(PETSC_USE_INFO) 139 if (ci[am]) { 140 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 141 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 142 } else { 143 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 144 } 145 #endif 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 151 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 152 { 153 PetscErrorCode ierr; 154 PetscLogDouble flops=0.0; 155 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 156 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 157 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 158 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 159 PetscInt am=A->rmap->n,cm=C->rmap->n; 160 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 161 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 162 PetscScalar *ab_dense; 163 164 PetscFunctionBegin; 165 /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */ 166 if (!c->a){ /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 167 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 168 c->a = ca; 169 c->free_a = PETSC_TRUE; 170 171 ierr = PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);CHKERRQ(ierr); 172 ierr = PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 173 c->matmult_abdense = ab_dense; 174 } else { 175 ca = c->a; 176 ab_dense = c->matmult_abdense; 177 } 178 179 /* clean old values in C */ 180 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 181 /* Traverse A row-wise. */ 182 /* Build the ith row in C by summing over nonzero columns in A, */ 183 /* the rows of B corresponding to nonzeros of A. */ 184 for (i=0; i<am; i++) { 185 anzi = ai[i+1] - ai[i]; 186 for (j=0; j<anzi; j++) { 187 brow = aj[j]; 188 bnzi = bi[brow+1] - bi[brow]; 189 bjj = bj + bi[brow]; 190 baj = ba + bi[brow]; 191 /* perform dense axpy */ 192 valtmp = aa[j]; 193 for (k=0; k<bnzi; k++) { 194 ab_dense[bjj[k]] += valtmp*baj[k]; 195 } 196 flops += 2*bnzi; 197 } 198 aj += anzi; aa += anzi; 199 200 cnzi = ci[i+1] - ci[i]; 201 for (k=0; k<cnzi; k++) { 202 ca[k] += ab_dense[cj[k]]; 203 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 204 } 205 flops += cnzi; 206 cj += cnzi; ca += cnzi; 207 } 208 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 209 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 210 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 216 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 217 { 218 PetscErrorCode ierr; 219 PetscLogDouble flops=0.0; 220 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 221 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 222 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 223 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 224 PetscInt am=A->rmap->N,cm=C->rmap->N; 225 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 226 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 227 PetscInt nextb; 228 229 PetscFunctionBegin; 230 #if defined(DEBUG_MATMATMULT) 231 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 232 #endif 233 /* clean old values in C */ 234 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 235 /* Traverse A row-wise. */ 236 /* Build the ith row in C by summing over nonzero columns in A, */ 237 /* the rows of B corresponding to nonzeros of A. */ 238 for (i=0;i<am;i++) { 239 anzi = ai[i+1] - ai[i]; 240 cnzi = ci[i+1] - ci[i]; 241 for (j=0;j<anzi;j++) { 242 brow = aj[j]; 243 bnzi = bi[brow+1] - bi[brow]; 244 bjj = bj + bi[brow]; 245 baj = ba + bi[brow]; 246 /* perform sparse axpy */ 247 valtmp = aa[j]; 248 nextb = 0; 249 for (k=0; nextb<bnzi; k++) { 250 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 251 ca[k] += valtmp*baj[nextb++]; 252 } 253 } 254 flops += 2*bnzi; 255 } 256 aj += anzi; aa += anzi; 257 cj += cnzi; ca += cnzi; 258 } 259 260 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 261 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 262 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast" 268 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat *C) 269 { 270 PetscErrorCode ierr; 271 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 272 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 273 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 274 MatScalar *ca; 275 PetscReal afill; 276 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 277 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 278 279 PetscFunctionBegin; 280 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 281 /*-----------------------------------------------------------------------------------------*/ 282 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 283 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 284 ci[0] = 0; 285 286 /* create and initialize a linked list */ 287 nlnk_max = a->rmax*b->rmax; 288 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */ 289 ierr = PetscLLCondensedCreate_fast(nlnk_max,&lnk);CHKERRQ(ierr); 290 291 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 292 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 293 current_space = free_space; 294 295 /* Determine ci and cj */ 296 for (i=0; i<am; i++) { 297 anzi = ai[i+1] - ai[i]; 298 aj = a->j + ai[i]; 299 for (j=0; j<anzi; j++){ 300 brow = aj[j]; 301 bnzj = bi[brow+1] - bi[brow]; 302 bj = b->j + bi[brow]; 303 /* add non-zero cols of B into the sorted linked list lnk */ 304 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 305 } 306 cnzi = lnk[1]; 307 308 /* If free space is not available, make more free space */ 309 /* Double the amount of total space in the list */ 310 if (current_space->local_remaining<cnzi) { 311 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 312 ndouble++; 313 } 314 315 /* Copy data into free space, then initialize lnk */ 316 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 317 current_space->array += cnzi; 318 current_space->local_used += cnzi; 319 current_space->local_remaining -= cnzi; 320 ci[i+1] = ci[i] + cnzi; 321 } 322 323 /* Column indices are in the list of free space */ 324 /* Allocate space for cj, initialize cj, and */ 325 /* destroy list of free space and other temporary array(s) */ 326 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 327 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 328 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 329 330 /* Allocate space for ca */ 331 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 332 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 333 334 /* put together the new symbolic matrix */ 335 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 336 (*C)->rmap->bs = A->rmap->bs; 337 (*C)->cmap->bs = B->cmap->bs; 338 339 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 340 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 341 c = (Mat_SeqAIJ *)((*C)->data); 342 c->free_a = PETSC_TRUE; 343 c->free_ij = PETSC_TRUE; 344 c->nonew = 0; 345 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 346 347 /* set MatInfo */ 348 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 349 if (afill < 1.0) afill = 1.0; 350 c->maxnz = ci[am]; 351 c->nz = ci[am]; 352 (*C)->info.mallocs = ndouble; 353 (*C)->info.fill_ratio_given = fill; 354 (*C)->info.fill_ratio_needed = afill; 355 356 #if defined(PETSC_USE_INFO) 357 if (ci[am]) { 358 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 359 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 360 } else { 361 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 362 } 363 #endif 364 PetscFunctionReturn(0); 365 } 366 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 370 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 371 { 372 PetscErrorCode ierr; 373 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 374 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 375 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 376 MatScalar *ca; 377 PetscReal afill; 378 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 379 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 380 381 PetscFunctionBegin; 382 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 383 /*---------------------------------------------------------------------------------------------*/ 384 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 385 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 386 ci[0] = 0; 387 388 /* create and initialize a linked list */ 389 nlnk_max = a->rmax*b->rmax; 390 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */ 391 ierr = PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);CHKERRQ(ierr); 392 393 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 394 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 395 current_space = free_space; 396 397 /* Determine ci and cj */ 398 for (i=0; i<am; i++) { 399 anzi = ai[i+1] - ai[i]; 400 aj = a->j + ai[i]; 401 for (j=0; j<anzi; j++){ 402 brow = aj[j]; 403 bnzj = bi[brow+1] - bi[brow]; 404 bj = b->j + bi[brow]; 405 /* add non-zero cols of B into the sorted linked list lnk */ 406 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 407 } 408 cnzi = lnk[0]; 409 410 /* If free space is not available, make more free space */ 411 /* Double the amount of total space in the list */ 412 if (current_space->local_remaining<cnzi) { 413 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 414 ndouble++; 415 } 416 417 /* Copy data into free space, then initialize lnk */ 418 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 419 current_space->array += cnzi; 420 current_space->local_used += cnzi; 421 current_space->local_remaining -= cnzi; 422 ci[i+1] = ci[i] + cnzi; 423 } 424 425 /* Column indices are in the list of free space */ 426 /* Allocate space for cj, initialize cj, and */ 427 /* destroy list of free space and other temporary array(s) */ 428 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 429 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 430 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 431 432 /* Allocate space for ca */ 433 /*-----------------------*/ 434 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 435 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 436 437 /* put together the new symbolic matrix */ 438 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 439 (*C)->rmap->bs = A->rmap->bs; 440 (*C)->cmap->bs = B->cmap->bs; 441 442 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 443 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 444 c = (Mat_SeqAIJ *)((*C)->data); 445 c->free_a = PETSC_TRUE; 446 c->free_ij = PETSC_TRUE; 447 c->nonew = 0; 448 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 449 450 /* set MatInfo */ 451 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 452 if (afill < 1.0) afill = 1.0; 453 c->maxnz = ci[am]; 454 c->nz = ci[am]; 455 (*C)->info.mallocs = ndouble; 456 (*C)->info.fill_ratio_given = fill; 457 (*C)->info.fill_ratio_needed = afill; 458 459 #if defined(PETSC_USE_INFO) 460 if (ci[am]) { 461 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 462 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 463 } else { 464 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 465 } 466 #endif 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap" 472 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat *C) 473 { 474 PetscErrorCode ierr; 475 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 476 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 477 PetscInt *ci,*cj,*bb; 478 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 479 PetscReal afill; 480 PetscInt i,j,col,ndouble = 0; 481 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 482 PetscHeap h; 483 484 PetscFunctionBegin; 485 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 486 /*---------------------------------------------------------------------------------------------*/ 487 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 488 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 489 ci[0] = 0; 490 491 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 492 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 493 current_space = free_space; 494 495 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 496 ierr = PetscMalloc(a->rmax*sizeof(PetscInt),&bb);CHKERRQ(ierr); 497 498 /* Determine ci and cj */ 499 for (i=0; i<am; i++) { 500 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 501 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 502 ci[i+1] = ci[i]; 503 /* Populate the min heap */ 504 for (j=0; j<anzi; j++) { 505 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 506 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 507 ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr); 508 } 509 } 510 /* Pick off the min element, adding it to free space */ 511 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 512 while (j >= 0) { 513 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 514 ierr = PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);CHKERRQ(ierr); 515 ndouble++; 516 } 517 *(current_space->array++) = col; 518 current_space->local_used++; 519 current_space->local_remaining--; 520 ci[i+1]++; 521 522 /* stash if anything else remains in this row of B */ 523 if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);} 524 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 525 PetscInt j2,col2; 526 ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr); 527 if (col2 != col) break; 528 ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr); 529 if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);} 530 } 531 /* Put any stashed elements back into the min heap */ 532 ierr = PetscHeapUnstash(h);CHKERRQ(ierr); 533 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 534 } 535 } 536 ierr = PetscFree(bb);CHKERRQ(ierr); 537 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 538 539 /* Column indices are in the list of free space */ 540 /* Allocate space for cj, initialize cj, and */ 541 /* destroy list of free space and other temporary array(s) */ 542 ierr = PetscMalloc(ci[am]*sizeof(PetscInt),&cj);CHKERRQ(ierr); 543 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 544 545 /* put together the new symbolic matrix */ 546 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 547 (*C)->rmap->bs = A->rmap->bs; 548 (*C)->cmap->bs = B->cmap->bs; 549 550 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 551 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 552 c = (Mat_SeqAIJ *)((*C)->data); 553 c->free_a = PETSC_TRUE; 554 c->free_ij = PETSC_TRUE; 555 c->nonew = 0; 556 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 557 558 /* set MatInfo */ 559 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 560 if (afill < 1.0) afill = 1.0; 561 c->maxnz = ci[am]; 562 c->nz = ci[am]; 563 (*C)->info.mallocs = ndouble; 564 (*C)->info.fill_ratio_given = fill; 565 (*C)->info.fill_ratio_needed = afill; 566 567 #if defined(PETSC_USE_INFO) 568 if (ci[am]) { 569 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 570 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 571 } else { 572 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 573 } 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap" 580 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat *C) 581 { 582 PetscErrorCode ierr; 583 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 584 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 585 PetscInt *ci,*cj,*bb; 586 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 587 PetscReal afill; 588 PetscInt i,j,col,ndouble = 0; 589 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 590 PetscHeap h; 591 PetscBT bt; 592 593 PetscFunctionBegin; 594 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 595 /*---------------------------------------------------------------------------------------------*/ 596 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 597 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 598 ci[0] = 0; 599 600 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 601 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 602 current_space = free_space; 603 604 ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 605 ierr = PetscMalloc(a->rmax*sizeof(PetscInt),&bb);CHKERRQ(ierr); 606 ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr); 607 608 /* Determine ci and cj */ 609 for (i=0; i<am; i++) { 610 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 611 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 612 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 613 ci[i+1] = ci[i]; 614 /* Populate the min heap */ 615 for (j=0; j<anzi; j++) { 616 PetscInt brow = acol[j]; 617 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 618 PetscInt bcol = bj[bb[j]]; 619 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 620 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 621 bb[j]++; 622 break; 623 } 624 } 625 } 626 /* Pick off the min element, adding it to free space */ 627 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 628 while (j >= 0) { 629 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 630 fptr = PETSC_NULL; /* need PetscBTMemzero */ 631 ierr = PetscFreeSpaceGet(PetscMin(2*current_space->total_array_size,16 << 20),¤t_space);CHKERRQ(ierr); 632 ndouble++; 633 } 634 *(current_space->array++) = col; 635 current_space->local_used++; 636 current_space->local_remaining--; 637 ci[i+1]++; 638 639 /* stash if anything else remains in this row of B */ 640 for ( ; bb[j] < bi[acol[j]+1]; bb[j]++) { 641 PetscInt bcol = bj[bb[j]]; 642 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 643 ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 644 bb[j]++; 645 break; 646 } 647 } 648 ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 649 } 650 if (fptr) { /* Clear the bits for this row */ 651 for ( ; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);} 652 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 653 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 654 } 655 } 656 ierr = PetscFree(bb);CHKERRQ(ierr); 657 ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 658 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 659 660 /* Column indices are in the list of free space */ 661 /* Allocate space for cj, initialize cj, and */ 662 /* destroy list of free space and other temporary array(s) */ 663 ierr = PetscMalloc(ci[am]*sizeof(PetscInt),&cj);CHKERRQ(ierr); 664 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 665 666 /* put together the new symbolic matrix */ 667 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 668 (*C)->rmap->bs = A->rmap->bs; 669 (*C)->cmap->bs = B->cmap->bs; 670 671 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 672 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 673 c = (Mat_SeqAIJ *)((*C)->data); 674 c->free_a = PETSC_TRUE; 675 c->free_ij = PETSC_TRUE; 676 c->nonew = 0; 677 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 678 679 /* set MatInfo */ 680 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 681 if (afill < 1.0) afill = 1.0; 682 c->maxnz = ci[am]; 683 c->nz = ci[am]; 684 (*C)->info.mallocs = ndouble; 685 (*C)->info.fill_ratio_given = fill; 686 (*C)->info.fill_ratio_needed = afill; 687 688 #if defined(PETSC_USE_INFO) 689 if (ci[am]) { 690 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 691 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 692 } else { 693 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 694 } 695 #endif 696 PetscFunctionReturn(0); 697 } 698 699 /* This routine is not used. Should be removed! */ 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 702 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 703 { 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 if (scall == MAT_INITIAL_MATRIX){ 708 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 709 } 710 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 716 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 717 { 718 PetscErrorCode ierr; 719 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 720 721 PetscFunctionBegin; 722 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 723 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 724 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 725 ierr = PetscFree(multtrans);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 731 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 732 { 733 PetscErrorCode ierr; 734 PetscContainer container; 735 Mat_MatMatTransMult *multtrans=PETSC_NULL; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 739 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 740 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 741 A->ops->destroy = multtrans->destroy; 742 if (A->ops->destroy) { 743 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 744 } 745 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 751 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 752 { 753 PetscErrorCode ierr; 754 Mat Bt; 755 PetscInt *bti,*btj; 756 Mat_MatMatTransMult *multtrans; 757 PetscContainer container; 758 PetscLogDouble t0,tf,etime2=0.0; 759 760 PetscFunctionBegin; 761 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 762 /* create symbolic Bt */ 763 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 764 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 765 Bt->rmap->bs = A->cmap->bs; 766 Bt->cmap->bs = B->cmap->bs; 767 768 /* get symbolic C=A*Bt */ 769 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 770 771 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 772 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 773 774 /* attach the supporting struct to C */ 775 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 776 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 777 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 778 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 779 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 780 781 multtrans->usecoloring = PETSC_FALSE; 782 multtrans->destroy = (*C)->ops->destroy; 783 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 784 785 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 786 etime2 += tf - t0; 787 788 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 789 if (multtrans->usecoloring){ 790 /* Create MatTransposeColoring from symbolic C=A*B^T */ 791 MatTransposeColoring matcoloring; 792 ISColoring iscoloring; 793 Mat Bt_dense,C_dense; 794 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 795 796 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 797 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 798 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 799 etime0 += tf - t0; 800 801 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 802 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 803 multtrans->matcoloring = matcoloring; 804 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 805 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 806 etime01 += tf - t0; 807 808 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 809 /* Create Bt_dense and C_dense = A*Bt_dense */ 810 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 811 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 812 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 813 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 814 Bt_dense->assembled = PETSC_TRUE; 815 multtrans->Bt_den = Bt_dense; 816 817 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 818 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 819 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 820 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 821 Bt_dense->assembled = PETSC_TRUE; 822 multtrans->ABt_den = C_dense; 823 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 824 etime1 += tf - t0; 825 826 #if defined(PETSC_USE_INFO) 827 { 828 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 829 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 830 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 831 } 832 #endif 833 } 834 /* clean up */ 835 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 836 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 837 838 839 840 #if defined(INEFFICIENT_ALGORITHM) 841 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 842 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 843 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 844 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 845 PetscInt am=A->rmap->N,bm=B->rmap->N; 846 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 847 MatScalar *ca; 848 PetscBT lnkbt; 849 PetscReal afill; 850 851 /* Allocate row pointer array ci */ 852 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 853 ci[0] = 0; 854 855 /* Create and initialize a linked list for C columns */ 856 nlnk = bm+1; 857 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 858 859 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 860 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 861 current_space = free_space; 862 863 /* Determine symbolic info for each row of the product A*B^T: */ 864 for (i=0; i<am; i++) { 865 anzi = ai[i+1] - ai[i]; 866 cnzi = 0; 867 acol = aj + ai[i]; 868 for (j=0; j<bm; j++){ 869 bnzj = bi[j+1] - bi[j]; 870 bcol= bj + bi[j]; 871 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 872 ka = 0; kb = 0; 873 while (ka < anzi && kb < bnzj){ 874 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 875 if (ka == anzi) break; 876 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 877 if (kb == bnzj) break; 878 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 879 index[0] = j; 880 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 881 cnzi++; 882 break; 883 } 884 } 885 } 886 887 /* If free space is not available, make more free space */ 888 /* Double the amount of total space in the list */ 889 if (current_space->local_remaining<cnzi) { 890 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 891 nspacedouble++; 892 } 893 894 /* Copy data into free space, then initialize lnk */ 895 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 896 current_space->array += cnzi; 897 current_space->local_used += cnzi; 898 current_space->local_remaining -= cnzi; 899 900 ci[i+1] = ci[i] + cnzi; 901 } 902 903 904 /* Column indices are in the list of free space. 905 Allocate array cj, copy column indices to cj, and destroy list of free space */ 906 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 907 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 908 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 909 910 /* Allocate space for ca */ 911 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 912 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 913 914 /* put together the new symbolic matrix */ 915 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 916 (*C)->rmap->bs = A->cmap->bs; 917 (*C)->cmap->bs = B->cmap->bs; 918 919 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 920 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 921 c = (Mat_SeqAIJ *)((*C)->data); 922 c->free_a = PETSC_TRUE; 923 c->free_ij = PETSC_TRUE; 924 c->nonew = 0; 925 926 /* set MatInfo */ 927 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 928 if (afill < 1.0) afill = 1.0; 929 c->maxnz = ci[am]; 930 c->nz = ci[am]; 931 (*C)->info.mallocs = nspacedouble; 932 (*C)->info.fill_ratio_given = fill; 933 (*C)->info.fill_ratio_needed = afill; 934 935 #if defined(PETSC_USE_INFO) 936 if (ci[am]) { 937 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 938 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 939 } else { 940 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 941 } 942 #endif 943 #endif 944 PetscFunctionReturn(0); 945 } 946 947 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 950 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 951 { 952 PetscErrorCode ierr; 953 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 954 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 955 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 956 PetscLogDouble flops=0.0; 957 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca,*cval; 958 Mat_MatMatTransMult *multtrans; 959 PetscContainer container; 960 #if defined(USE_ARRAY) 961 MatScalar *spdot; 962 #endif 963 964 PetscFunctionBegin; 965 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 966 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 967 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 968 if (multtrans->usecoloring){ 969 MatTransposeColoring matcoloring = multtrans->matcoloring; 970 Mat Bt_dense; 971 PetscInt m,n; 972 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 973 Mat C_dense = multtrans->ABt_den; 974 975 Bt_dense = multtrans->Bt_den; 976 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 977 978 /* Get Bt_dense by Apply MatTransposeColoring to B */ 979 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 980 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 981 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 982 etime0 += tf - t0; 983 984 /* C_dense = A*Bt_dense */ 985 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 986 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 987 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 988 etime2 += tf - t0; 989 990 /* Recover C from C_dense */ 991 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 992 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 993 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 994 etime1 += tf - t0; 995 #if defined(PETSC_USE_INFO) 996 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 997 #endif 998 PetscFunctionReturn(0); 999 } 1000 1001 #if defined(USE_ARRAY) 1002 /* allocate an array for implementing sparse inner-product */ 1003 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 1004 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 1005 #endif 1006 1007 /* clear old values in C */ 1008 if (!c->a){ 1009 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1010 c->a = ca; 1011 c->free_a = PETSC_TRUE; 1012 } else { 1013 ca = c->a; 1014 } 1015 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1016 1017 for (i=0; i<cm; i++) { 1018 anzi = ai[i+1] - ai[i]; 1019 acol = aj + ai[i]; 1020 aval = aa + ai[i]; 1021 cnzi = ci[i+1] - ci[i]; 1022 ccol = cj + ci[i]; 1023 cval = ca + ci[i]; 1024 for (j=0; j<cnzi; j++){ 1025 brow = ccol[j]; 1026 bnzj = bi[brow+1] - bi[brow]; 1027 bcol = bj + bi[brow]; 1028 bval = ba + bi[brow]; 1029 1030 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 1031 #if defined(USE_ARRAY) 1032 /* put ba to spdot array */ 1033 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 1034 /* c(i,j)=A[i,:]*B[j,:]^T */ 1035 for (nexta=0; nexta<anzi; nexta++){ 1036 cval[j] += spdot[acol[nexta]]*aval[nexta]; 1037 } 1038 /* zero spdot array */ 1039 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 1040 #else 1041 nexta = 0; nextb = 0; 1042 while (nexta<anzi && nextb<bnzj){ 1043 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 1044 if (nexta == anzi) break; 1045 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 1046 if (nextb == bnzj) break; 1047 if (acol[nexta] == bcol[nextb]){ 1048 cval[j] += aval[nexta]*bval[nextb]; 1049 nexta++; nextb++; 1050 flops += 2; 1051 } 1052 } 1053 #endif 1054 } 1055 } 1056 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1057 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1058 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1059 #if defined(USE_ARRAY) 1060 ierr = PetscFree(spdot); 1061 #endif 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 1067 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 if (scall == MAT_INITIAL_MATRIX){ 1072 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 1073 } 1074 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 1080 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 1081 { 1082 PetscErrorCode ierr; 1083 Mat At; 1084 PetscInt *ati,*atj; 1085 1086 PetscFunctionBegin; 1087 /* create symbolic At */ 1088 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1089 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 1090 At->rmap->bs = A->cmap->bs; 1091 At->cmap->bs = B->cmap->bs; 1092 1093 /* get symbolic C=At*B */ 1094 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 1095 1096 /* clean up */ 1097 ierr = MatDestroy(&At);CHKERRQ(ierr); 1098 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 1104 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1108 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1109 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1110 PetscLogDouble flops=0.0; 1111 MatScalar *aa=a->a,*ba,*ca,*caj; 1112 1113 PetscFunctionBegin; 1114 if (!c->a){ 1115 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1116 c->a = ca; 1117 c->free_a = PETSC_TRUE; 1118 } else { 1119 ca = c->a; 1120 } 1121 /* clear old values in C */ 1122 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1123 1124 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1125 for (i=0;i<am;i++) { 1126 bj = b->j + bi[i]; 1127 ba = b->a + bi[i]; 1128 bnzi = bi[i+1] - bi[i]; 1129 anzi = ai[i+1] - ai[i]; 1130 for (j=0; j<anzi; j++) { 1131 nextb = 0; 1132 crow = *aj++; 1133 cjj = cj + ci[crow]; 1134 caj = ca + ci[crow]; 1135 /* perform sparse axpy operation. Note cjj includes bj. */ 1136 for (k=0; nextb<bnzi; k++) { 1137 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1138 caj[k] += (*aa)*(*(ba+nextb)); 1139 nextb++; 1140 } 1141 } 1142 flops += 2*bnzi; 1143 aa++; 1144 } 1145 } 1146 1147 /* Assemble the final matrix and clean up */ 1148 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1149 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1150 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 1157 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1158 { 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 if (scall == MAT_INITIAL_MATRIX){ 1163 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1164 } 1165 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 EXTERN_C_END 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1172 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1178 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1184 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1185 { 1186 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1189 MatScalar *aa; 1190 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1191 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1192 1193 PetscFunctionBegin; 1194 if (!cm || !cn) PetscFunctionReturn(0); 1195 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1196 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1197 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1198 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1199 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 1200 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1201 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1202 colam = col*am; 1203 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1204 r1 = r2 = r3 = r4 = 0.0; 1205 n = a->i[i+1] - a->i[i]; 1206 aj = a->j + a->i[i]; 1207 aa = a->a + a->i[i]; 1208 for (j=0; j<n; j++) { 1209 r1 += (*aa)*b1[*aj]; 1210 r2 += (*aa)*b2[*aj]; 1211 r3 += (*aa)*b3[*aj]; 1212 r4 += (*aa++)*b4[*aj++]; 1213 } 1214 c[colam + i] = r1; 1215 c[colam + am + i] = r2; 1216 c[colam + am2 + i] = r3; 1217 c[colam + am3 + i] = r4; 1218 } 1219 b1 += bm4; 1220 b2 += bm4; 1221 b3 += bm4; 1222 b4 += bm4; 1223 } 1224 for (;col<cn; col++){ /* over extra columns of C */ 1225 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1226 r1 = 0.0; 1227 n = a->i[i+1] - a->i[i]; 1228 aj = a->j + a->i[i]; 1229 aa = a->a + a->i[i]; 1230 1231 for (j=0; j<n; j++) { 1232 r1 += (*aa++)*b1[*aj++]; 1233 } 1234 c[col*am + i] = r1; 1235 } 1236 b1 += bm; 1237 } 1238 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1239 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1240 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 1241 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1242 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 /* 1247 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1248 */ 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1251 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1252 { 1253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1256 MatScalar *aa; 1257 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1258 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1259 1260 PetscFunctionBegin; 1261 if (!cm || !cn) PetscFunctionReturn(0); 1262 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1263 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 1264 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1265 1266 if (a->compressedrow.use){ /* use compressed row format */ 1267 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1268 colam = col*am; 1269 arm = a->compressedrow.nrows; 1270 ii = a->compressedrow.i; 1271 ridx = a->compressedrow.rindex; 1272 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1273 r1 = r2 = r3 = r4 = 0.0; 1274 n = ii[i+1] - ii[i]; 1275 aj = a->j + ii[i]; 1276 aa = a->a + ii[i]; 1277 for (j=0; j<n; j++) { 1278 r1 += (*aa)*b1[*aj]; 1279 r2 += (*aa)*b2[*aj]; 1280 r3 += (*aa)*b3[*aj]; 1281 r4 += (*aa++)*b4[*aj++]; 1282 } 1283 c[colam + ridx[i]] += r1; 1284 c[colam + am + ridx[i]] += r2; 1285 c[colam + am2 + ridx[i]] += r3; 1286 c[colam + am3 + ridx[i]] += r4; 1287 } 1288 b1 += bm4; 1289 b2 += bm4; 1290 b3 += bm4; 1291 b4 += bm4; 1292 } 1293 for (;col<cn; col++){ /* over extra columns of C */ 1294 colam = col*am; 1295 arm = a->compressedrow.nrows; 1296 ii = a->compressedrow.i; 1297 ridx = a->compressedrow.rindex; 1298 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1299 r1 = 0.0; 1300 n = ii[i+1] - ii[i]; 1301 aj = a->j + ii[i]; 1302 aa = a->a + ii[i]; 1303 1304 for (j=0; j<n; j++) { 1305 r1 += (*aa++)*b1[*aj++]; 1306 } 1307 c[col*am + ridx[i]] += r1; 1308 } 1309 b1 += bm; 1310 } 1311 } else { 1312 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1313 colam = col*am; 1314 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1315 r1 = r2 = r3 = r4 = 0.0; 1316 n = a->i[i+1] - a->i[i]; 1317 aj = a->j + a->i[i]; 1318 aa = a->a + a->i[i]; 1319 for (j=0; j<n; j++) { 1320 r1 += (*aa)*b1[*aj]; 1321 r2 += (*aa)*b2[*aj]; 1322 r3 += (*aa)*b3[*aj]; 1323 r4 += (*aa++)*b4[*aj++]; 1324 } 1325 c[colam + i] += r1; 1326 c[colam + am + i] += r2; 1327 c[colam + am2 + i] += r3; 1328 c[colam + am3 + i] += r4; 1329 } 1330 b1 += bm4; 1331 b2 += bm4; 1332 b3 += bm4; 1333 b4 += bm4; 1334 } 1335 for (;col<cn; col++){ /* over extra columns of C */ 1336 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1337 r1 = 0.0; 1338 n = a->i[i+1] - a->i[i]; 1339 aj = a->j + a->i[i]; 1340 aa = a->a + a->i[i]; 1341 1342 for (j=0; j<n; j++) { 1343 r1 += (*aa++)*b1[*aj++]; 1344 } 1345 c[col*am + i] += r1; 1346 } 1347 b1 += bm; 1348 } 1349 } 1350 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1352 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1358 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1359 { 1360 PetscErrorCode ierr; 1361 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1362 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1363 PetscInt *bi=b->i,*bj=b->j; 1364 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1365 MatScalar *btval,*btval_den,*ba=b->a; 1366 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1367 1368 PetscFunctionBegin; 1369 btval_den=btdense->v; 1370 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1371 for (k=0; k<ncolors; k++) { 1372 ncolumns = coloring->ncolumns[k]; 1373 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1374 col = *(columns + colorforcol[k] + l); 1375 btcol = bj + bi[col]; 1376 btval = ba + bi[col]; 1377 anz = bi[col+1] - bi[col]; 1378 for (j=0; j<anz; j++){ 1379 brow = btcol[j]; 1380 btval_den[brow] = btval[j]; 1381 } 1382 } 1383 btval_den += m; 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1390 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1391 { 1392 PetscErrorCode ierr; 1393 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1394 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1395 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1396 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1397 1398 PetscFunctionBegin; 1399 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1400 ierr = MatDenseGetArray(Cden,&ca_den);CHKERRQ(ierr); 1401 cp_den = ca_den; 1402 for (k=0; k<ncolors; k++) { 1403 nrows = matcoloring->nrows[k]; 1404 row = rows + colorforrow[k]; 1405 idx = spidx + colorforrow[k]; 1406 for (l=0; l<nrows; l++){ 1407 ca[idx[l]] = cp_den[row[l]]; 1408 } 1409 cp_den += m; 1410 } 1411 ierr = MatDenseRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 /* 1416 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1417 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1418 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1419 */ 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1422 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1423 { 1424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1427 PetscInt nz = a->i[m],row,*jj,mr,col; 1428 PetscInt *cspidx; 1429 1430 PetscFunctionBegin; 1431 *nn = n; 1432 if (!ia) PetscFunctionReturn(0); 1433 if (symmetric) { 1434 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1435 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,(PetscInt**)ia,(PetscInt**)ja);CHKERRQ(ierr); 1436 } else { 1437 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1438 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1439 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1440 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1441 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1442 jj = a->j; 1443 for (i=0; i<nz; i++) { 1444 collengths[jj[i]]++; 1445 } 1446 cia[0] = oshift; 1447 for (i=0; i<n; i++) { 1448 cia[i+1] = cia[i] + collengths[i]; 1449 } 1450 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1451 jj = a->j; 1452 for (row=0; row<m; row++) { 1453 mr = a->i[row+1] - a->i[row]; 1454 for (i=0; i<mr; i++) { 1455 col = *jj++; 1456 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1457 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1458 } 1459 } 1460 ierr = PetscFree(collengths);CHKERRQ(ierr); 1461 *ia = cia; *ja = cja; 1462 *spidx = cspidx; 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1469 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1470 { 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 if (!ia) PetscFunctionReturn(0); 1475 1476 ierr = PetscFree(*ia);CHKERRQ(ierr); 1477 ierr = PetscFree(*ja);CHKERRQ(ierr); 1478 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1484 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1485 { 1486 PetscErrorCode ierr; 1487 PetscInt i,n,nrows,N,j,k,m,ncols,col,cm; 1488 const PetscInt *is,*ci,*cj,*row_idx; 1489 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1490 IS *isa; 1491 PetscBool flg1,flg2; 1492 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1493 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1494 PetscInt *colorforcol,*columns,*columns_i; 1495 1496 PetscFunctionBegin; 1497 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1498 1499 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1500 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1501 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1502 if (flg1 || flg2) { 1503 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1504 } 1505 1506 N = mat->cmap->N/bs; 1507 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1508 c->N = mat->cmap->N/bs; 1509 c->m = mat->rmap->N/bs; 1510 c->rstart = 0; 1511 1512 c->ncolors = nis; 1513 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1514 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1515 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1516 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1517 colorforrow[0] = 0; 1518 rows_i = rows; 1519 columnsforspidx_i = columnsforspidx; 1520 1521 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1522 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1523 colorforcol[0] = 0; 1524 columns_i = columns; 1525 1526 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,PETSC_NULL);CHKERRQ(ierr); /* column-wise storage of mat */ 1527 1528 cm = c->m; 1529 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1530 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1531 for (i=0; i<nis; i++) { 1532 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1533 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1534 c->ncolumns[i] = n; 1535 if (n) { 1536 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1537 } 1538 colorforcol[i+1] = colorforcol[i] + n; 1539 columns_i += n; 1540 1541 /* fast, crude version requires O(N*N) work */ 1542 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1543 1544 /* loop over columns*/ 1545 for (j=0; j<n; j++) { 1546 col = is[j]; 1547 row_idx = cj + ci[col]; 1548 m = ci[col+1] - ci[col]; 1549 /* loop over columns marking them in rowhit */ 1550 for (k=0; k<m; k++) { 1551 idxhit[*row_idx] = spidx[ci[col] + k]; 1552 rowhit[*row_idx++] = col + 1; 1553 } 1554 } 1555 /* count the number of hits */ 1556 nrows = 0; 1557 for (j=0; j<cm; j++) { 1558 if (rowhit[j]) nrows++; 1559 } 1560 c->nrows[i] = nrows; 1561 colorforrow[i+1] = colorforrow[i] + nrows; 1562 1563 nrows = 0; 1564 for (j=0; j<cm; j++) { 1565 if (rowhit[j]) { 1566 rows_i[nrows] = j; 1567 columnsforspidx_i[nrows] = idxhit[j]; 1568 nrows++; 1569 } 1570 } 1571 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1572 rows_i += nrows; columnsforspidx_i += nrows; 1573 } 1574 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,PETSC_NULL);CHKERRQ(ierr); 1575 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1576 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1577 #if defined(PETSC_USE_DEBUG) 1578 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1579 #endif 1580 1581 c->colorforrow = colorforrow; 1582 c->rows = rows; 1583 c->columnsforspidx = columnsforspidx; 1584 c->colorforcol = colorforcol; 1585 c->columns = columns; 1586 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589