1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 PetscBool scalable=PETSC_FALSE,scalable_new=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 if (scall == MAT_INITIAL_MATRIX){ 24 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 25 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsBool("-matmatmult_scalable_new","Use a scalable but slower C=A*B","",scalable_new,&scalable_new,PETSC_NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsEnd();CHKERRQ(ierr); 28 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 29 if (scalable_new){ 30 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 31 } else if (scalable){ 32 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 33 } else { 34 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 35 } 36 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 37 } 38 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 39 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 40 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 EXTERN_C_END 44 45 /* 46 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 47 Input Parameter: 48 . am, Ai, Aj - number of rows and structure of A 49 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 50 . fill - filll ratio See MatMatMult() 51 52 Output Parameter: 53 . Ci, Cj - structure of C = A*B 54 . nspacedouble - number of extra mallocs 55 */ 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 58 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 59 { 60 PetscErrorCode ierr; 61 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 62 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 63 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 64 const PetscInt *aj=Aj,*bi=Bi,*bj=Bj,*bjj; 65 PetscInt *ci,*cj,nlnk_max; 66 PetscInt i,j,anzi,brow,bnzj,cnzi,*lnk,ndouble=0; 67 PetscBT lnkbt; 68 69 PetscFunctionBegin; 70 /* Allocate ci array, arrays for fill computation and */ 71 /* free space for accumulating nonzero column info */ 72 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 73 ci[0] = 0; 74 75 /* create and initialize a linked list */ 76 nlnk_max = a->rmax*b->rmax; 77 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; 78 ierr = PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);CHKERRQ(ierr); 79 80 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 81 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 82 current_space = free_space; 83 84 /* Determine ci and cj */ 85 for (i=0; i<am; i++) { 86 anzi = Ai[i+1] - Ai[i]; 87 aj = Aj + Ai[i]; 88 for (j=0; j<anzi; j++){ 89 brow = aj[j]; 90 bnzj = bi[brow+1] - bi[brow]; 91 bjj = bj + bi[brow]; 92 /* add non-zero cols of B into the sorted linked list lnk */ 93 ierr = PetscLLCondensedAddSorted(bnzj,bjj,lnk,lnkbt);CHKERRQ(ierr); 94 } 95 cnzi = lnk[0]; 96 97 /* If free space is not available, make more free space */ 98 /* Double the amount of total space in the list */ 99 if (current_space->local_remaining<cnzi) { 100 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 101 ndouble++; 102 } 103 104 /* Copy data into free space, then initialize lnk */ 105 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 106 current_space->array += cnzi; 107 current_space->local_used += cnzi; 108 current_space->local_remaining -= cnzi; 109 ci[i+1] = ci[i] + cnzi; 110 } 111 112 /* Column indices are in the list of free space */ 113 /* Allocate space for cj, initialize cj, and */ 114 /* destroy list of free space and other temporary array(s) */ 115 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 116 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 117 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 118 119 *Ci = ci; 120 *Cj = cj; 121 *nspacedouble = ndouble; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable" 127 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 128 { 129 PetscErrorCode ierr; 130 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 131 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 132 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 133 const PetscInt *aj=Aj,*bi=Bi,*bj; 134 PetscInt *ci,*cj; 135 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 136 PetscInt nlnk_max,*lnk; 137 138 PetscFunctionBegin; 139 /* Allocate ci array, arrays for fill computation and */ 140 /* free space for accumulating nonzero column info */ 141 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 142 ci[0] = 0; 143 144 /* create and initialize a condensed linked list */ 145 crmax = a->rmax*b->rmax; 146 nlnk_max = PetscMin(crmax,bn); 147 if (!nlnk_max && bn) { 148 nlnk_max = bn; /* in case rmax is not defined for A or B */ 149 #if defined(PETSC_USE_DEBUGGING) 150 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 151 #endif 152 } 153 #if defined(DEBUG_MATMATMULT) 154 ierr = (PETSC_COMM_SELF,"LLCondensedCreate nlnk_max=%d, bn %d, crmax %d\n",nlnk_max,bn,crmax); 155 #endif 156 157 ierr = PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);CHKERRQ(ierr); 158 159 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 160 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 161 current_space = free_space; 162 163 /* Determine ci and cj */ 164 for (i=0; i<am; i++) { 165 anzi = Ai[i+1] - Ai[i]; 166 cnzi = 0; 167 aj = Aj + Ai[i]; 168 for (j=0; j<anzi; j++){ 169 brow = aj[j]; 170 bnzj = bi[brow+1] - bi[brow]; 171 bj = Bj + bi[brow]; 172 /* add non-zero cols of B into the condensed sorted linked list lnk */ 173 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 174 } 175 cnzi = lnk[0]; 176 ci[i+1] = ci[i] + cnzi; 177 178 /* If free space is not available, make more free space */ 179 /* Double the amount of total space in the list */ 180 if (current_space->local_remaining<cnzi) { 181 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 182 ndouble++; 183 } 184 185 /* Copy data into free space, then initialize lnk */ 186 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 187 current_space->array += cnzi; 188 current_space->local_used += cnzi; 189 current_space->local_remaining -= cnzi; 190 } 191 192 /* Column indices are in the list of free space */ 193 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 194 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 195 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 196 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 197 198 *Ci = ci; 199 *Cj = cj; 200 *nspacedouble = ndouble; 201 PetscFunctionReturn(0); 202 } 203 204 /* 205 Does not use bitarray 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 209 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 210 { 211 PetscErrorCode ierr; 212 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 213 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 214 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 215 const PetscInt *aj=Aj,*bi=Bi,*bj; 216 PetscInt *ci,*cj; 217 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 218 PetscInt lnk_max=bn,*lnk; 219 220 PetscFunctionBegin; 221 /* Allocate ci array, arrays for fill computation and */ 222 /* free space for accumulating nonzero column info */ 223 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 224 ci[0] = 0; 225 226 /* create and initialize a condensed linked list */ 227 crmax = a->rmax*b->rmax; 228 lnk_max = PetscMin(crmax,bn); 229 if (!lnk_max && bn) { 230 lnk_max = bn; /* in case rmax is not defined for A or B */ 231 #if defined(PETSC_USE_DEBUGGING) 232 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 233 #endif 234 } 235 #if defined(DEBUG_MATMATMULT) 236 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 237 #endif 238 ierr = PetscLLCondensedCreate_fast(lnk_max,&lnk);CHKERRQ(ierr); 239 240 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 241 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 242 current_space = free_space; 243 244 /* Determine ci and cj */ 245 for (i=0; i<am; i++) { 246 anzi = Ai[i+1] - Ai[i]; 247 cnzi = 0; 248 aj = Aj + Ai[i]; 249 for (j=0; j<anzi; j++){ 250 brow = aj[j]; 251 bnzj = bi[brow+1] - bi[brow]; 252 bj = Bj + bi[brow]; 253 /* add non-zero cols of B into the condensed sorted linked list lnk */ 254 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 255 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 256 } 257 cnzi = lnk[1]; 258 ci[i+1] = ci[i] + cnzi; 259 260 /* If free space is not available, make more free space */ 261 /* Double the amount of total space in the list */ 262 if (current_space->local_remaining<cnzi) { 263 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 264 ndouble++; 265 } 266 267 /* Copy data into free space, then initialize lnk */ 268 /* ierr = PetscLLCondensedView_fast(lnk);CHKERRQ(ierr); */ 269 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 270 /* PetscIntView(cnzi,current_space->array,0); */ 271 current_space->array += cnzi; 272 current_space->local_used += cnzi; 273 current_space->local_remaining -= cnzi; 274 } 275 276 /* Column indices are in the list of free space */ 277 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 278 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 279 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 280 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 281 282 *Ci = ci; 283 *Cj = cj; 284 *nspacedouble = ndouble; 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 290 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 291 { 292 PetscErrorCode ierr; 293 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 294 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 295 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 296 PetscReal afill; 297 298 PetscFunctionBegin; 299 /* printf("MatMatMultSymbolic_SeqAIJ_SeqAIJ...\n"); */ 300 /* Get ci and cj */ 301 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 302 303 /* put together the new symbolic matrix */ 304 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 305 306 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 307 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 308 c = (Mat_SeqAIJ *)((*C)->data); 309 c->free_a = PETSC_FALSE; 310 c->free_ij = PETSC_TRUE; 311 c->nonew = 0; 312 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 313 314 /* set MatInfo */ 315 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 316 if (afill < 1.0) afill = 1.0; 317 c->maxnz = ci[am]; 318 c->nz = ci[am]; 319 (*C)->info.mallocs = nspacedouble; 320 (*C)->info.fill_ratio_given = fill; 321 (*C)->info.fill_ratio_needed = afill; 322 323 #if defined(PETSC_USE_INFO) 324 if (ci[am]) { 325 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 326 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 327 } else { 328 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 329 } 330 #endif 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 336 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 337 { 338 PetscErrorCode ierr; 339 PetscLogDouble flops=0.0; 340 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 341 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 342 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 343 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 344 PetscInt am=A->rmap->n,cm=C->rmap->n; 345 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 346 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 347 PetscScalar *ab_dense; 348 349 PetscFunctionBegin; 350 /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */ 351 if (!c->a){ /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 352 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 353 c->a = ca; 354 c->free_a = PETSC_TRUE; 355 356 ierr = PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);CHKERRQ(ierr); 357 ierr = PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 358 c->matmult_abdense = ab_dense; 359 } else { 360 ca = c->a; 361 ab_dense = c->matmult_abdense; 362 } 363 364 /* clean old values in C */ 365 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 366 /* Traverse A row-wise. */ 367 /* Build the ith row in C by summing over nonzero columns in A, */ 368 /* the rows of B corresponding to nonzeros of A. */ 369 for (i=0; i<am; i++) { 370 anzi = ai[i+1] - ai[i]; 371 for (j=0; j<anzi; j++) { 372 brow = aj[j]; 373 bnzi = bi[brow+1] - bi[brow]; 374 bjj = bj + bi[brow]; 375 baj = ba + bi[brow]; 376 /* perform dense axpy */ 377 valtmp = aa[j]; 378 for (k=0; k<bnzi; k++) { 379 ab_dense[bjj[k]] += valtmp*baj[k]; 380 } 381 flops += 2*bnzi; 382 } 383 aj += anzi; aa += anzi; 384 385 cnzi = ci[i+1] - ci[i]; 386 for (k=0; k<cnzi; k++) { 387 ca[k] += ab_dense[cj[k]]; 388 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 389 } 390 flops += cnzi; 391 cj += cnzi; ca += cnzi; 392 } 393 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 #undef __FUNCT__ 400 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 401 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 402 { 403 PetscErrorCode ierr; 404 PetscLogDouble flops=0.0; 405 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 406 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 407 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 408 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 409 PetscInt am=A->rmap->N,cm=C->rmap->N; 410 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 411 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 412 PetscInt nextb; 413 414 PetscFunctionBegin; 415 #if defined(DEBUG_MATMATMULT) 416 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 417 #endif 418 /* clean old values in C */ 419 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 420 /* Traverse A row-wise. */ 421 /* Build the ith row in C by summing over nonzero columns in A, */ 422 /* the rows of B corresponding to nonzeros of A. */ 423 for (i=0;i<am;i++) { 424 anzi = ai[i+1] - ai[i]; 425 cnzi = ci[i+1] - ci[i]; 426 for (j=0;j<anzi;j++) { 427 brow = aj[j]; 428 bnzi = bi[brow+1] - bi[brow]; 429 bjj = bj + bi[brow]; 430 baj = ba + bi[brow]; 431 /* perform sparse axpy */ 432 valtmp = aa[j]; 433 nextb = 0; 434 for (k=0; nextb<bnzi; k++) { 435 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 436 ca[k] += valtmp*baj[nextb++]; 437 } 438 } 439 flops += 2*bnzi; 440 } 441 aj += anzi; aa += anzi; 442 cj += cnzi; ca += cnzi; 443 } 444 445 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 446 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 447 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 453 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 454 { 455 PetscErrorCode ierr; 456 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 457 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 458 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 459 MatScalar *ca; 460 PetscReal afill; 461 462 PetscFunctionBegin; 463 #if defined(DEBUG_MATMATMULT) 464 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 465 #endif 466 /* Get ci and cj */ 467 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 468 469 /* Allocate space for ca */ 470 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 471 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 472 473 /* put together the new symbolic matrix */ 474 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 475 476 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 477 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 478 c = (Mat_SeqAIJ *)((*C)->data); 479 c->free_a = PETSC_TRUE; 480 c->free_ij = PETSC_TRUE; 481 c->nonew = 0; 482 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 483 484 /* set MatInfo */ 485 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 486 if (afill < 1.0) afill = 1.0; 487 c->maxnz = ci[am]; 488 c->nz = ci[am]; 489 (*C)->info.mallocs = nspacedouble; 490 (*C)->info.fill_ratio_given = fill; 491 (*C)->info.fill_ratio_needed = afill; 492 493 #if defined(PETSC_USE_INFO) 494 if (ci[am]) { 495 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 496 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 497 } else { 498 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 499 } 500 #endif 501 PetscFunctionReturn(0); 502 } 503 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 507 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 508 { 509 PetscErrorCode ierr; 510 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 511 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 512 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 513 MatScalar *ca; 514 PetscReal afill; 515 516 PetscFunctionBegin; 517 /* Get ci and cj */ 518 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 519 520 /* Allocate space for ca */ 521 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 522 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 523 524 /* put together the new symbolic matrix */ 525 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 526 527 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 528 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 529 c = (Mat_SeqAIJ *)((*C)->data); 530 c->free_a = PETSC_TRUE; 531 c->free_ij = PETSC_TRUE; 532 c->nonew = 0; 533 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 534 535 /* set MatInfo */ 536 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 537 if (afill < 1.0) afill = 1.0; 538 c->maxnz = ci[am]; 539 c->nz = ci[am]; 540 (*C)->info.mallocs = nspacedouble; 541 (*C)->info.fill_ratio_given = fill; 542 (*C)->info.fill_ratio_needed = afill; 543 544 #if defined(PETSC_USE_INFO) 545 if (ci[am]) { 546 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 547 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 548 } else { 549 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 550 } 551 #endif 552 PetscFunctionReturn(0); 553 } 554 555 556 /* This routine is not used. Should be removed! */ 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 559 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 560 { 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 if (scall == MAT_INITIAL_MATRIX){ 565 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 566 } 567 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 573 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 574 { 575 PetscErrorCode ierr; 576 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 577 578 PetscFunctionBegin; 579 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 580 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 581 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 582 ierr = PetscFree(multtrans);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 588 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 589 { 590 PetscErrorCode ierr; 591 PetscContainer container; 592 Mat_MatMatTransMult *multtrans=PETSC_NULL; 593 594 PetscFunctionBegin; 595 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 596 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 597 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 598 A->ops->destroy = multtrans->destroy; 599 if (A->ops->destroy) { 600 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 601 } 602 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 608 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 609 { 610 PetscErrorCode ierr; 611 Mat Bt; 612 PetscInt *bti,*btj; 613 Mat_MatMatTransMult *multtrans; 614 PetscContainer container; 615 PetscLogDouble t0,tf,etime2=0.0; 616 617 PetscFunctionBegin; 618 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 619 /* create symbolic Bt */ 620 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 621 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 622 623 /* get symbolic C=A*Bt */ 624 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 625 626 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 627 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 628 629 /* attach the supporting struct to C */ 630 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 631 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 632 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 633 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 634 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 635 636 multtrans->usecoloring = PETSC_FALSE; 637 multtrans->destroy = (*C)->ops->destroy; 638 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 639 640 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 641 etime2 += tf - t0; 642 643 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 644 if (multtrans->usecoloring){ 645 /* Create MatTransposeColoring from symbolic C=A*B^T */ 646 MatTransposeColoring matcoloring; 647 ISColoring iscoloring; 648 Mat Bt_dense,C_dense; 649 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 650 651 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 652 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 653 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 654 etime0 += tf - t0; 655 656 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 657 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 658 multtrans->matcoloring = matcoloring; 659 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 660 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 661 etime01 += tf - t0; 662 663 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 664 /* Create Bt_dense and C_dense = A*Bt_dense */ 665 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 666 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 667 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 668 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 669 Bt_dense->assembled = PETSC_TRUE; 670 multtrans->Bt_den = Bt_dense; 671 672 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 673 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 674 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 675 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 676 Bt_dense->assembled = PETSC_TRUE; 677 multtrans->ABt_den = C_dense; 678 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 679 etime1 += tf - t0; 680 681 #if defined(PETSC_USE_INFO) 682 { 683 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 684 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 685 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 686 } 687 #endif 688 } 689 /* clean up */ 690 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 691 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 692 693 694 695 #if defined(INEFFICIENT_ALGORITHM) 696 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 697 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 698 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 699 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 700 PetscInt am=A->rmap->N,bm=B->rmap->N; 701 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 702 MatScalar *ca; 703 PetscBT lnkbt; 704 PetscReal afill; 705 706 /* Allocate row pointer array ci */ 707 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 708 ci[0] = 0; 709 710 /* Create and initialize a linked list for C columns */ 711 nlnk = bm+1; 712 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 713 714 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 715 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 716 current_space = free_space; 717 718 /* Determine symbolic info for each row of the product A*B^T: */ 719 for (i=0; i<am; i++) { 720 anzi = ai[i+1] - ai[i]; 721 cnzi = 0; 722 acol = aj + ai[i]; 723 for (j=0; j<bm; j++){ 724 bnzj = bi[j+1] - bi[j]; 725 bcol= bj + bi[j]; 726 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 727 ka = 0; kb = 0; 728 while (ka < anzi && kb < bnzj){ 729 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 730 if (ka == anzi) break; 731 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 732 if (kb == bnzj) break; 733 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 734 index[0] = j; 735 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 736 cnzi++; 737 break; 738 } 739 } 740 } 741 742 /* If free space is not available, make more free space */ 743 /* Double the amount of total space in the list */ 744 if (current_space->local_remaining<cnzi) { 745 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 746 nspacedouble++; 747 } 748 749 /* Copy data into free space, then initialize lnk */ 750 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 751 current_space->array += cnzi; 752 current_space->local_used += cnzi; 753 current_space->local_remaining -= cnzi; 754 755 ci[i+1] = ci[i] + cnzi; 756 } 757 758 759 /* Column indices are in the list of free space. 760 Allocate array cj, copy column indices to cj, and destroy list of free space */ 761 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 762 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 763 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 764 765 /* Allocate space for ca */ 766 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 767 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 768 769 /* put together the new symbolic matrix */ 770 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 771 772 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 773 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 774 c = (Mat_SeqAIJ *)((*C)->data); 775 c->free_a = PETSC_TRUE; 776 c->free_ij = PETSC_TRUE; 777 c->nonew = 0; 778 779 /* set MatInfo */ 780 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 781 if (afill < 1.0) afill = 1.0; 782 c->maxnz = ci[am]; 783 c->nz = ci[am]; 784 (*C)->info.mallocs = nspacedouble; 785 (*C)->info.fill_ratio_given = fill; 786 (*C)->info.fill_ratio_needed = afill; 787 788 #if defined(PETSC_USE_INFO) 789 if (ci[am]) { 790 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 791 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 792 } else { 793 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 794 } 795 #endif 796 #endif 797 PetscFunctionReturn(0); 798 } 799 800 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 803 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 804 { 805 PetscErrorCode ierr; 806 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 807 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 808 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 809 PetscLogDouble flops=0.0; 810 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca,*cval; 811 Mat_MatMatTransMult *multtrans; 812 PetscContainer container; 813 #if defined(USE_ARRAY) 814 MatScalar *spdot; 815 #endif 816 817 PetscFunctionBegin; 818 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 819 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 820 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 821 if (multtrans->usecoloring){ 822 MatTransposeColoring matcoloring = multtrans->matcoloring; 823 Mat Bt_dense; 824 PetscInt m,n; 825 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 826 Mat C_dense = multtrans->ABt_den; 827 828 Bt_dense = multtrans->Bt_den; 829 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 830 831 /* Get Bt_dense by Apply MatTransposeColoring to B */ 832 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 833 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 834 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 835 etime0 += tf - t0; 836 837 /* C_dense = A*Bt_dense */ 838 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 839 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 840 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 841 etime2 += tf - t0; 842 843 /* Recover C from C_dense */ 844 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 845 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 846 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 847 etime1 += tf - t0; 848 #if defined(PETSC_USE_INFO) 849 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 850 #endif 851 PetscFunctionReturn(0); 852 } 853 854 #if defined(USE_ARRAY) 855 /* allocate an array for implementing sparse inner-product */ 856 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 857 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 858 #endif 859 860 /* clear old values in C */ 861 if (!c->a){ 862 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 863 c->a = ca; 864 c->free_a = PETSC_TRUE; 865 } else { 866 ca = c->a; 867 } 868 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 869 870 for (i=0; i<cm; i++) { 871 anzi = ai[i+1] - ai[i]; 872 acol = aj + ai[i]; 873 aval = aa + ai[i]; 874 cnzi = ci[i+1] - ci[i]; 875 ccol = cj + ci[i]; 876 cval = ca + ci[i]; 877 for (j=0; j<cnzi; j++){ 878 brow = ccol[j]; 879 bnzj = bi[brow+1] - bi[brow]; 880 bcol = bj + bi[brow]; 881 bval = ba + bi[brow]; 882 883 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 884 #if defined(USE_ARRAY) 885 /* put ba to spdot array */ 886 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 887 /* c(i,j)=A[i,:]*B[j,:]^T */ 888 for (nexta=0; nexta<anzi; nexta++){ 889 cval[j] += spdot[acol[nexta]]*aval[nexta]; 890 } 891 /* zero spdot array */ 892 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 893 #else 894 nexta = 0; nextb = 0; 895 while (nexta<anzi && nextb<bnzj){ 896 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 897 if (nexta == anzi) break; 898 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 899 if (nextb == bnzj) break; 900 if (acol[nexta] == bcol[nextb]){ 901 cval[j] += aval[nexta]*bval[nextb]; 902 nexta++; nextb++; 903 flops += 2; 904 } 905 } 906 #endif 907 } 908 } 909 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 911 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 912 #if defined(USE_ARRAY) 913 ierr = PetscFree(spdot); 914 #endif 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 920 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 if (scall == MAT_INITIAL_MATRIX){ 925 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 926 } 927 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 933 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 934 { 935 PetscErrorCode ierr; 936 Mat At; 937 PetscInt *ati,*atj; 938 939 PetscFunctionBegin; 940 /* create symbolic At */ 941 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 942 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 943 944 /* get symbolic C=At*B */ 945 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 946 947 /* clean up */ 948 ierr = MatDestroy(&At);CHKERRQ(ierr); 949 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 955 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 956 { 957 PetscErrorCode ierr; 958 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 959 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 960 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 961 PetscLogDouble flops=0.0; 962 MatScalar *aa=a->a,*ba,*ca,*caj; 963 964 PetscFunctionBegin; 965 if (!c->a){ 966 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 967 c->a = ca; 968 c->free_a = PETSC_TRUE; 969 } else { 970 ca = c->a; 971 } 972 /* clear old values in C */ 973 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 974 975 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 976 for (i=0;i<am;i++) { 977 bj = b->j + bi[i]; 978 ba = b->a + bi[i]; 979 bnzi = bi[i+1] - bi[i]; 980 anzi = ai[i+1] - ai[i]; 981 for (j=0; j<anzi; j++) { 982 nextb = 0; 983 crow = *aj++; 984 cjj = cj + ci[crow]; 985 caj = ca + ci[crow]; 986 /* perform sparse axpy operation. Note cjj includes bj. */ 987 for (k=0; nextb<bnzi; k++) { 988 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 989 caj[k] += (*aa)*(*(ba+nextb)); 990 nextb++; 991 } 992 } 993 flops += 2*bnzi; 994 aa++; 995 } 996 } 997 998 /* Assemble the final matrix and clean up */ 999 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 1008 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1009 { 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 if (scall == MAT_INITIAL_MATRIX){ 1014 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1015 } 1016 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 EXTERN_C_END 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1023 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1024 { 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1029 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1035 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1036 { 1037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1038 PetscErrorCode ierr; 1039 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1040 MatScalar *aa; 1041 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1042 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1043 1044 PetscFunctionBegin; 1045 if (!cm || !cn) PetscFunctionReturn(0); 1046 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1047 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1048 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1049 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1050 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1051 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1052 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1053 colam = col*am; 1054 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1055 r1 = r2 = r3 = r4 = 0.0; 1056 n = a->i[i+1] - a->i[i]; 1057 aj = a->j + a->i[i]; 1058 aa = a->a + a->i[i]; 1059 for (j=0; j<n; j++) { 1060 r1 += (*aa)*b1[*aj]; 1061 r2 += (*aa)*b2[*aj]; 1062 r3 += (*aa)*b3[*aj]; 1063 r4 += (*aa++)*b4[*aj++]; 1064 } 1065 c[colam + i] = r1; 1066 c[colam + am + i] = r2; 1067 c[colam + am2 + i] = r3; 1068 c[colam + am3 + i] = r4; 1069 } 1070 b1 += bm4; 1071 b2 += bm4; 1072 b3 += bm4; 1073 b4 += bm4; 1074 } 1075 for (;col<cn; col++){ /* over extra columns of C */ 1076 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1077 r1 = 0.0; 1078 n = a->i[i+1] - a->i[i]; 1079 aj = a->j + a->i[i]; 1080 aa = a->a + a->i[i]; 1081 1082 for (j=0; j<n; j++) { 1083 r1 += (*aa++)*b1[*aj++]; 1084 } 1085 c[col*am + i] = r1; 1086 } 1087 b1 += bm; 1088 } 1089 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1090 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1091 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1092 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1093 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1099 */ 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1102 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1103 { 1104 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1107 MatScalar *aa; 1108 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1109 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1110 1111 PetscFunctionBegin; 1112 if (!cm || !cn) PetscFunctionReturn(0); 1113 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1114 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1115 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1116 1117 if (a->compressedrow.use){ /* use compressed row format */ 1118 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1119 colam = col*am; 1120 arm = a->compressedrow.nrows; 1121 ii = a->compressedrow.i; 1122 ridx = a->compressedrow.rindex; 1123 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1124 r1 = r2 = r3 = r4 = 0.0; 1125 n = ii[i+1] - ii[i]; 1126 aj = a->j + ii[i]; 1127 aa = a->a + ii[i]; 1128 for (j=0; j<n; j++) { 1129 r1 += (*aa)*b1[*aj]; 1130 r2 += (*aa)*b2[*aj]; 1131 r3 += (*aa)*b3[*aj]; 1132 r4 += (*aa++)*b4[*aj++]; 1133 } 1134 c[colam + ridx[i]] += r1; 1135 c[colam + am + ridx[i]] += r2; 1136 c[colam + am2 + ridx[i]] += r3; 1137 c[colam + am3 + ridx[i]] += r4; 1138 } 1139 b1 += bm4; 1140 b2 += bm4; 1141 b3 += bm4; 1142 b4 += bm4; 1143 } 1144 for (;col<cn; col++){ /* over extra columns of C */ 1145 colam = col*am; 1146 arm = a->compressedrow.nrows; 1147 ii = a->compressedrow.i; 1148 ridx = a->compressedrow.rindex; 1149 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1150 r1 = 0.0; 1151 n = ii[i+1] - ii[i]; 1152 aj = a->j + ii[i]; 1153 aa = a->a + ii[i]; 1154 1155 for (j=0; j<n; j++) { 1156 r1 += (*aa++)*b1[*aj++]; 1157 } 1158 c[col*am + ridx[i]] += r1; 1159 } 1160 b1 += bm; 1161 } 1162 } else { 1163 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1164 colam = col*am; 1165 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1166 r1 = r2 = r3 = r4 = 0.0; 1167 n = a->i[i+1] - a->i[i]; 1168 aj = a->j + a->i[i]; 1169 aa = a->a + a->i[i]; 1170 for (j=0; j<n; j++) { 1171 r1 += (*aa)*b1[*aj]; 1172 r2 += (*aa)*b2[*aj]; 1173 r3 += (*aa)*b3[*aj]; 1174 r4 += (*aa++)*b4[*aj++]; 1175 } 1176 c[colam + i] += r1; 1177 c[colam + am + i] += r2; 1178 c[colam + am2 + i] += r3; 1179 c[colam + am3 + i] += r4; 1180 } 1181 b1 += bm4; 1182 b2 += bm4; 1183 b3 += bm4; 1184 b4 += bm4; 1185 } 1186 for (;col<cn; col++){ /* over extra columns of C */ 1187 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1188 r1 = 0.0; 1189 n = a->i[i+1] - a->i[i]; 1190 aj = a->j + a->i[i]; 1191 aa = a->a + a->i[i]; 1192 1193 for (j=0; j<n; j++) { 1194 r1 += (*aa++)*b1[*aj++]; 1195 } 1196 c[col*am + i] += r1; 1197 } 1198 b1 += bm; 1199 } 1200 } 1201 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1202 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1203 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1209 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1210 { 1211 PetscErrorCode ierr; 1212 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1213 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1214 PetscInt *bi=b->i,*bj=b->j; 1215 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1216 MatScalar *btval,*btval_den,*ba=b->a; 1217 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1218 1219 PetscFunctionBegin; 1220 btval_den=btdense->v; 1221 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1222 for (k=0; k<ncolors; k++) { 1223 ncolumns = coloring->ncolumns[k]; 1224 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1225 col = *(columns + colorforcol[k] + l); 1226 btcol = bj + bi[col]; 1227 btval = ba + bi[col]; 1228 anz = bi[col+1] - bi[col]; 1229 for (j=0; j<anz; j++){ 1230 brow = btcol[j]; 1231 btval_den[brow] = btval[j]; 1232 } 1233 } 1234 btval_den += m; 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1241 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1245 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1246 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1247 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1248 1249 PetscFunctionBegin; 1250 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1251 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1252 cp_den = ca_den; 1253 for (k=0; k<ncolors; k++) { 1254 nrows = matcoloring->nrows[k]; 1255 row = rows + colorforrow[k]; 1256 idx = spidx + colorforrow[k]; 1257 for (l=0; l<nrows; l++){ 1258 ca[idx[l]] = cp_den[row[l]]; 1259 } 1260 cp_den += m; 1261 } 1262 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* 1267 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1268 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1269 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1270 */ 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1273 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1274 { 1275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1276 PetscErrorCode ierr; 1277 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1278 PetscInt nz = a->i[m],row,*jj,mr,col; 1279 PetscInt *cspidx; 1280 1281 PetscFunctionBegin; 1282 *nn = n; 1283 if (!ia) PetscFunctionReturn(0); 1284 if (symmetric) { 1285 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1286 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1287 } else { 1288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1289 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1290 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1291 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1292 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1293 jj = a->j; 1294 for (i=0; i<nz; i++) { 1295 collengths[jj[i]]++; 1296 } 1297 cia[0] = oshift; 1298 for (i=0; i<n; i++) { 1299 cia[i+1] = cia[i] + collengths[i]; 1300 } 1301 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1302 jj = a->j; 1303 for (row=0; row<m; row++) { 1304 mr = a->i[row+1] - a->i[row]; 1305 for (i=0; i<mr; i++) { 1306 col = *jj++; 1307 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1308 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1309 } 1310 } 1311 ierr = PetscFree(collengths);CHKERRQ(ierr); 1312 *ia = cia; *ja = cja; 1313 *spidx = cspidx; 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1320 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1321 { 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 if (!ia) PetscFunctionReturn(0); 1326 1327 ierr = PetscFree(*ia);CHKERRQ(ierr); 1328 ierr = PetscFree(*ja);CHKERRQ(ierr); 1329 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1335 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1336 { 1337 PetscErrorCode ierr; 1338 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1339 const PetscInt *is; 1340 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1341 IS *isa; 1342 PetscBool done; 1343 PetscBool flg1,flg2; 1344 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1345 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1346 PetscInt *colorforcol,*columns,*columns_i; 1347 1348 PetscFunctionBegin; 1349 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1350 1351 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1352 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1353 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1354 if (flg1 || flg2) { 1355 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1356 } 1357 1358 N = mat->cmap->N/bs; 1359 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1360 c->N = mat->cmap->N/bs; 1361 c->m = mat->rmap->N/bs; 1362 c->rstart = 0; 1363 1364 c->ncolors = nis; 1365 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1366 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1367 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1368 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1369 colorforrow[0] = 0; 1370 rows_i = rows; 1371 columnsforspidx_i = columnsforspidx; 1372 1373 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1374 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1375 colorforcol[0] = 0; 1376 columns_i = columns; 1377 1378 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1379 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1380 1381 cm = c->m; 1382 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1383 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1384 for (i=0; i<nis; i++) { 1385 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1386 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1387 c->ncolumns[i] = n; 1388 if (n) { 1389 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1390 } 1391 colorforcol[i+1] = colorforcol[i] + n; 1392 columns_i += n; 1393 1394 /* fast, crude version requires O(N*N) work */ 1395 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1396 1397 /* loop over columns*/ 1398 for (j=0; j<n; j++) { 1399 col = is[j]; 1400 row_idx = cj + ci[col]; 1401 m = ci[col+1] - ci[col]; 1402 /* loop over columns marking them in rowhit */ 1403 for (k=0; k<m; k++) { 1404 idxhit[*row_idx] = spidx[ci[col] + k]; 1405 rowhit[*row_idx++] = col + 1; 1406 } 1407 } 1408 /* count the number of hits */ 1409 nrows = 0; 1410 for (j=0; j<cm; j++) { 1411 if (rowhit[j]) nrows++; 1412 } 1413 c->nrows[i] = nrows; 1414 colorforrow[i+1] = colorforrow[i] + nrows; 1415 1416 nrows = 0; 1417 for (j=0; j<cm; j++) { 1418 if (rowhit[j]) { 1419 rows_i[nrows] = j; 1420 columnsforspidx_i[nrows] = idxhit[j]; 1421 nrows++; 1422 } 1423 } 1424 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1425 rows_i += nrows; columnsforspidx_i += nrows; 1426 } 1427 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1428 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1429 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1430 #if defined(PETSC_USE_DEBUG) 1431 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1432 #endif 1433 1434 c->colorforrow = colorforrow; 1435 c->rows = rows; 1436 c->columnsforspidx = columnsforspidx; 1437 c->colorforcol = colorforcol; 1438 c->columns = columns; 1439 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442