1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 PetscBool scalable=PETSC_FALSE,scalable_new=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 if (scall == MAT_INITIAL_MATRIX){ 24 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 25 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsBool("-matmatmult_scalable_new","Use a scalable but slower C=A*B","",scalable_new,&scalable_new,PETSC_NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsEnd();CHKERRQ(ierr); 28 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 29 if (scalable_new){ 30 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 31 } else if (scalable){ 32 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 33 } else { 34 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 35 } 36 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 37 } 38 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 39 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 40 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 EXTERN_C_END 44 45 /* 46 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 47 Input Parameter: 48 . am, Ai, Aj - number of rows and structure of A 49 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 50 . fill - filll ratio See MatMatMult() 51 52 Output Parameter: 53 . Ci, Cj - structure of C = A*B 54 . nspacedouble - number of extra mallocs 55 */ 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 58 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 59 { 60 PetscErrorCode ierr; 61 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 62 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 63 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 64 const PetscInt *aj=Aj,*bi=Bi,*bj=Bj,*bjj; 65 PetscInt *ci,*cj,nlnk_max; 66 PetscInt i,j,anzi,brow,bnzj,cnzi,*lnk,ndouble=0; 67 PetscBT lnkbt; 68 69 PetscFunctionBegin; 70 /* Allocate ci array, arrays for fill computation and */ 71 /* free space for accumulating nonzero column info */ 72 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 73 ci[0] = 0; 74 75 /* create and initialize a linked list */ 76 nlnk_max = a->rmax*b->rmax; 77 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; 78 ierr = PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);CHKERRQ(ierr); 79 80 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 81 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 82 current_space = free_space; 83 84 /* Determine ci and cj */ 85 for (i=0; i<am; i++) { 86 anzi = Ai[i+1] - Ai[i]; 87 aj = Aj + Ai[i]; 88 for (j=0; j<anzi; j++){ 89 brow = aj[j]; 90 bnzj = bi[brow+1] - bi[brow]; 91 bjj = bj + bi[brow]; 92 /* add non-zero cols of B into the sorted linked list lnk */ 93 ierr = PetscLLCondensedAddSorted(bnzj,bjj,lnk,lnkbt);CHKERRQ(ierr); 94 } 95 cnzi = lnk[0]; 96 97 /* If free space is not available, make more free space */ 98 /* Double the amount of total space in the list */ 99 if (current_space->local_remaining<cnzi) { 100 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 101 ndouble++; 102 } 103 104 /* Copy data into free space, then initialize lnk */ 105 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 106 current_space->array += cnzi; 107 current_space->local_used += cnzi; 108 current_space->local_remaining -= cnzi; 109 ci[i+1] = ci[i] + cnzi; 110 } 111 112 /* Column indices are in the list of free space */ 113 /* Allocate space for cj, initialize cj, and */ 114 /* destroy list of free space and other temporary array(s) */ 115 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 116 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 117 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 118 119 *Ci = ci; 120 *Cj = cj; 121 *nspacedouble = ndouble; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable" 127 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 128 { 129 PetscErrorCode ierr; 130 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 131 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 132 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 133 const PetscInt *aj=Aj,*bi=Bi,*bj; 134 PetscInt *ci,*cj; 135 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 136 PetscInt nlnk_max,*lnk; 137 138 PetscFunctionBegin; 139 /* Allocate ci array, arrays for fill computation and */ 140 /* free space for accumulating nonzero column info */ 141 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 142 ci[0] = 0; 143 144 /* create and initialize a condensed linked list */ 145 crmax = a->rmax*b->rmax; 146 nlnk_max = PetscMin(crmax,bn); 147 if (!nlnk_max && bn) { 148 nlnk_max = bn; /* in case rmax is not defined for A or B */ 149 #if defined(PETSC_USE_DEBUGGING) 150 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 151 #endif 152 } 153 #if defined(DEBUG_MATMATMULT) 154 ierr = (PETSC_COMM_SELF,"LLCondensedCreate nlnk_max=%d, bn %d, crmax %d\n",nlnk_max,bn,crmax); 155 #endif 156 157 ierr = PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);CHKERRQ(ierr); 158 159 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 160 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 161 current_space = free_space; 162 163 /* Determine ci and cj */ 164 for (i=0; i<am; i++) { 165 anzi = Ai[i+1] - Ai[i]; 166 cnzi = 0; 167 aj = Aj + Ai[i]; 168 for (j=0; j<anzi; j++){ 169 brow = aj[j]; 170 bnzj = bi[brow+1] - bi[brow]; 171 bj = Bj + bi[brow]; 172 /* add non-zero cols of B into the condensed sorted linked list lnk */ 173 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 174 } 175 cnzi = lnk[0]; 176 ci[i+1] = ci[i] + cnzi; 177 178 /* If free space is not available, make more free space */ 179 /* Double the amount of total space in the list */ 180 if (current_space->local_remaining<cnzi) { 181 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 182 ndouble++; 183 } 184 185 /* Copy data into free space, then initialize lnk */ 186 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 187 current_space->array += cnzi; 188 current_space->local_used += cnzi; 189 current_space->local_remaining -= cnzi; 190 } 191 192 /* Column indices are in the list of free space */ 193 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 194 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 195 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 196 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 197 198 *Ci = ci; 199 *Cj = cj; 200 *nspacedouble = ndouble; 201 PetscFunctionReturn(0); 202 } 203 204 /* 205 Does not use bitarray 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 209 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 210 { 211 PetscErrorCode ierr; 212 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 213 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 214 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 215 const PetscInt *aj=Aj,*bi=Bi,*bj; 216 PetscInt *ci,*cj; 217 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 218 PetscInt lnk_max=bn,*lnk; 219 220 PetscFunctionBegin; 221 /* Allocate ci array, arrays for fill computation and */ 222 /* free space for accumulating nonzero column info */ 223 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 224 ci[0] = 0; 225 226 /* create and initialize a condensed linked list */ 227 crmax = a->rmax*b->rmax; 228 lnk_max = PetscMin(crmax,bn); 229 if (!lnk_max && bn) { 230 lnk_max = bn; /* in case rmax is not defined for A or B */ 231 #if defined(PETSC_USE_DEBUGGING) 232 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 233 #endif 234 } 235 #if defined(DEBUG_MATMATMULT) 236 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 237 #endif 238 ierr = PetscLLCondensedCreate_fast(lnk_max,&lnk);CHKERRQ(ierr); 239 240 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 241 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 242 current_space = free_space; 243 244 /* Determine ci and cj */ 245 for (i=0; i<am; i++) { 246 anzi = Ai[i+1] - Ai[i]; 247 cnzi = 0; 248 aj = Aj + Ai[i]; 249 for (j=0; j<anzi; j++){ 250 brow = aj[j]; 251 bnzj = bi[brow+1] - bi[brow]; 252 bj = Bj + bi[brow]; 253 /* add non-zero cols of B into the condensed sorted linked list lnk */ 254 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 255 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 256 } 257 cnzi = lnk[1]; 258 ci[i+1] = ci[i] + cnzi; 259 260 /* If free space is not available, make more free space */ 261 /* Double the amount of total space in the list */ 262 if (current_space->local_remaining<cnzi) { 263 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 264 ndouble++; 265 } 266 267 /* Copy data into free space, then initialize lnk */ 268 /* ierr = PetscLLCondensedView_fast(lnk);CHKERRQ(ierr); */ 269 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 270 /* PetscIntView(cnzi,current_space->array,0); */ 271 current_space->array += cnzi; 272 current_space->local_used += cnzi; 273 current_space->local_remaining -= cnzi; 274 } 275 276 /* Column indices are in the list of free space */ 277 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 278 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 279 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 280 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 281 282 *Ci = ci; 283 *Cj = cj; 284 *nspacedouble = ndouble; 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 290 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 291 { 292 PetscErrorCode ierr; 293 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 294 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 295 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 296 MatScalar *ca; 297 PetscReal afill; 298 299 PetscFunctionBegin; 300 /* Get ci and cj */ 301 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 302 303 /* Allocate space for ca */ 304 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 305 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 306 307 /* put together the new symbolic matrix */ 308 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 309 310 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 311 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 312 c = (Mat_SeqAIJ *)((*C)->data); 313 c->free_a = PETSC_TRUE; 314 c->free_ij = PETSC_TRUE; 315 c->nonew = 0; 316 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 317 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 318 ierr = PetscMemzero(c->matmult_abdense,bn*sizeof(PetscScalar));CHKERRQ(ierr); 319 320 /* set MatInfo */ 321 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 322 if (afill < 1.0) afill = 1.0; 323 c->maxnz = ci[am]; 324 c->nz = ci[am]; 325 (*C)->info.mallocs = nspacedouble; 326 (*C)->info.fill_ratio_given = fill; 327 (*C)->info.fill_ratio_needed = afill; 328 329 #if defined(PETSC_USE_INFO) 330 if (ci[am]) { 331 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 332 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 333 } else { 334 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 335 } 336 #endif 337 PetscFunctionReturn(0); 338 } 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 342 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 343 { 344 PetscErrorCode ierr; 345 PetscLogDouble flops=0.0; 346 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 347 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 348 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 349 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 350 PetscInt am=A->rmap->n,cm=C->rmap->n; 351 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 352 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 353 PetscScalar *ab_dense=c->matmult_abdense; 354 355 PetscFunctionBegin; 356 /* clean old values in C */ 357 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 358 /* Traverse A row-wise. */ 359 /* Build the ith row in C by summing over nonzero columns in A, */ 360 /* the rows of B corresponding to nonzeros of A. */ 361 for (i=0; i<am; i++) { 362 anzi = ai[i+1] - ai[i]; 363 for (j=0; j<anzi; j++) { 364 brow = aj[j]; 365 bnzi = bi[brow+1] - bi[brow]; 366 bjj = bj + bi[brow]; 367 baj = ba + bi[brow]; 368 /* perform dense axpy */ 369 valtmp = aa[j]; 370 for (k=0; k<bnzi; k++) { 371 ab_dense[bjj[k]] += valtmp*baj[k]; 372 } 373 flops += 2*bnzi; 374 } 375 aj += anzi; aa += anzi; 376 377 cnzi = ci[i+1] - ci[i]; 378 for (k=0; k<cnzi; k++) { 379 ca[k] += ab_dense[cj[k]]; 380 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 381 } 382 flops += cnzi; 383 cj += cnzi; ca += cnzi; 384 } 385 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 386 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 387 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 393 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 394 { 395 PetscErrorCode ierr; 396 PetscLogDouble flops=0.0; 397 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 398 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 399 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 400 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 401 PetscInt am=A->rmap->N,cm=C->rmap->N; 402 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 403 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 404 PetscInt nextb; 405 406 PetscFunctionBegin; 407 #if defined(DEBUG_MATMATMULT) 408 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 409 #endif 410 /* clean old values in C */ 411 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 412 /* Traverse A row-wise. */ 413 /* Build the ith row in C by summing over nonzero columns in A, */ 414 /* the rows of B corresponding to nonzeros of A. */ 415 for (i=0;i<am;i++) { 416 anzi = ai[i+1] - ai[i]; 417 cnzi = ci[i+1] - ci[i]; 418 for (j=0;j<anzi;j++) { 419 brow = aj[j]; 420 bnzi = bi[brow+1] - bi[brow]; 421 bjj = bj + bi[brow]; 422 baj = ba + bi[brow]; 423 /* perform sparse axpy */ 424 valtmp = aa[j]; 425 nextb = 0; 426 for (k=0; nextb<bnzi; k++) { 427 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 428 ca[k] += valtmp*baj[nextb++]; 429 } 430 } 431 flops += 2*bnzi; 432 } 433 aj += anzi; aa += anzi; 434 cj += cnzi; ca += cnzi; 435 } 436 437 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 438 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 439 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 445 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 446 { 447 PetscErrorCode ierr; 448 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 449 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 450 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 451 MatScalar *ca; 452 PetscReal afill; 453 454 PetscFunctionBegin; 455 #if defined(DEBUG_MATMATMULT) 456 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 457 #endif 458 /* Get ci and cj */ 459 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 460 461 /* Allocate space for ca */ 462 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 463 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 464 465 /* put together the new symbolic matrix */ 466 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 467 468 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 469 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 470 c = (Mat_SeqAIJ *)((*C)->data); 471 c->free_a = PETSC_TRUE; 472 c->free_ij = PETSC_TRUE; 473 c->nonew = 0; 474 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 475 476 /* set MatInfo */ 477 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 478 if (afill < 1.0) afill = 1.0; 479 c->maxnz = ci[am]; 480 c->nz = ci[am]; 481 (*C)->info.mallocs = nspacedouble; 482 (*C)->info.fill_ratio_given = fill; 483 (*C)->info.fill_ratio_needed = afill; 484 485 #if defined(PETSC_USE_INFO) 486 if (ci[am]) { 487 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 488 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 489 } else { 490 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 491 } 492 #endif 493 PetscFunctionReturn(0); 494 } 495 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 499 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 500 { 501 PetscErrorCode ierr; 502 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 503 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 504 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 505 MatScalar *ca; 506 PetscReal afill; 507 508 PetscFunctionBegin; 509 /* Get ci and cj */ 510 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 511 512 /* Allocate space for ca */ 513 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 514 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 515 516 /* put together the new symbolic matrix */ 517 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 518 519 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 520 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 521 c = (Mat_SeqAIJ *)((*C)->data); 522 c->free_a = PETSC_TRUE; 523 c->free_ij = PETSC_TRUE; 524 c->nonew = 0; 525 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 526 527 /* set MatInfo */ 528 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 529 if (afill < 1.0) afill = 1.0; 530 c->maxnz = ci[am]; 531 c->nz = ci[am]; 532 (*C)->info.mallocs = nspacedouble; 533 (*C)->info.fill_ratio_given = fill; 534 (*C)->info.fill_ratio_needed = afill; 535 536 #if defined(PETSC_USE_INFO) 537 if (ci[am]) { 538 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 539 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 540 } else { 541 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 542 } 543 #endif 544 PetscFunctionReturn(0); 545 } 546 547 548 /* This routine is not used. Should be removed! */ 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 551 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 552 { 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 if (scall == MAT_INITIAL_MATRIX){ 557 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 558 } 559 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 565 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 566 { 567 PetscErrorCode ierr; 568 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 569 570 PetscFunctionBegin; 571 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 572 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 573 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 574 ierr = PetscFree(multtrans);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 580 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 581 { 582 PetscErrorCode ierr; 583 PetscContainer container; 584 Mat_MatMatTransMult *multtrans=PETSC_NULL; 585 586 PetscFunctionBegin; 587 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 588 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 589 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 590 A->ops->destroy = multtrans->destroy; 591 if (A->ops->destroy) { 592 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 593 } 594 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 600 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 601 { 602 PetscErrorCode ierr; 603 Mat Bt; 604 PetscInt *bti,*btj; 605 Mat_MatMatTransMult *multtrans; 606 PetscContainer container; 607 PetscLogDouble t0,tf,etime2=0.0; 608 609 PetscFunctionBegin; 610 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 611 /* create symbolic Bt */ 612 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 613 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 614 615 /* get symbolic C=A*Bt */ 616 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 617 618 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 619 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 620 621 /* attach the supporting struct to C */ 622 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 623 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 624 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 625 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 626 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 627 628 multtrans->usecoloring = PETSC_FALSE; 629 multtrans->destroy = (*C)->ops->destroy; 630 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 631 632 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 633 etime2 += tf - t0; 634 635 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 636 if (multtrans->usecoloring){ 637 /* Create MatTransposeColoring from symbolic C=A*B^T */ 638 MatTransposeColoring matcoloring; 639 ISColoring iscoloring; 640 Mat Bt_dense,C_dense; 641 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 642 643 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 644 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 645 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 646 etime0 += tf - t0; 647 648 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 649 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 650 multtrans->matcoloring = matcoloring; 651 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 652 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 653 etime01 += tf - t0; 654 655 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 656 /* Create Bt_dense and C_dense = A*Bt_dense */ 657 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 658 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 659 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 660 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 661 Bt_dense->assembled = PETSC_TRUE; 662 multtrans->Bt_den = Bt_dense; 663 664 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 665 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 666 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 667 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 668 Bt_dense->assembled = PETSC_TRUE; 669 multtrans->ABt_den = C_dense; 670 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 671 etime1 += tf - t0; 672 673 #if defined(PETSC_USE_INFO) 674 { 675 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 676 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 677 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 678 } 679 #endif 680 } 681 /* clean up */ 682 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 683 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 684 685 686 687 #if defined(INEFFICIENT_ALGORITHM) 688 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 689 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 690 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 691 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 692 PetscInt am=A->rmap->N,bm=B->rmap->N; 693 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 694 MatScalar *ca; 695 PetscBT lnkbt; 696 PetscReal afill; 697 698 /* Allocate row pointer array ci */ 699 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 700 ci[0] = 0; 701 702 /* Create and initialize a linked list for C columns */ 703 nlnk = bm+1; 704 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 705 706 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 707 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 708 current_space = free_space; 709 710 /* Determine symbolic info for each row of the product A*B^T: */ 711 for (i=0; i<am; i++) { 712 anzi = ai[i+1] - ai[i]; 713 cnzi = 0; 714 acol = aj + ai[i]; 715 for (j=0; j<bm; j++){ 716 bnzj = bi[j+1] - bi[j]; 717 bcol= bj + bi[j]; 718 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 719 ka = 0; kb = 0; 720 while (ka < anzi && kb < bnzj){ 721 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 722 if (ka == anzi) break; 723 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 724 if (kb == bnzj) break; 725 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 726 index[0] = j; 727 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 728 cnzi++; 729 break; 730 } 731 } 732 } 733 734 /* If free space is not available, make more free space */ 735 /* Double the amount of total space in the list */ 736 if (current_space->local_remaining<cnzi) { 737 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 738 nspacedouble++; 739 } 740 741 /* Copy data into free space, then initialize lnk */ 742 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 743 current_space->array += cnzi; 744 current_space->local_used += cnzi; 745 current_space->local_remaining -= cnzi; 746 747 ci[i+1] = ci[i] + cnzi; 748 } 749 750 751 /* Column indices are in the list of free space. 752 Allocate array cj, copy column indices to cj, and destroy list of free space */ 753 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 754 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 755 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 756 757 /* Allocate space for ca */ 758 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 759 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 760 761 /* put together the new symbolic matrix */ 762 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 763 764 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 765 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 766 c = (Mat_SeqAIJ *)((*C)->data); 767 c->free_a = PETSC_TRUE; 768 c->free_ij = PETSC_TRUE; 769 c->nonew = 0; 770 771 /* set MatInfo */ 772 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 773 if (afill < 1.0) afill = 1.0; 774 c->maxnz = ci[am]; 775 c->nz = ci[am]; 776 (*C)->info.mallocs = nspacedouble; 777 (*C)->info.fill_ratio_given = fill; 778 (*C)->info.fill_ratio_needed = afill; 779 780 #if defined(PETSC_USE_INFO) 781 if (ci[am]) { 782 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 783 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 784 } else { 785 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 786 } 787 #endif 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 795 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 796 { 797 PetscErrorCode ierr; 798 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 799 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 800 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 801 PetscLogDouble flops=0.0; 802 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 803 Mat_MatMatTransMult *multtrans; 804 PetscContainer container; 805 #if defined(USE_ARRAY) 806 MatScalar *spdot; 807 #endif 808 809 PetscFunctionBegin; 810 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 811 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 812 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 813 if (multtrans->usecoloring){ 814 MatTransposeColoring matcoloring = multtrans->matcoloring; 815 Mat Bt_dense; 816 PetscInt m,n; 817 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 818 Mat C_dense = multtrans->ABt_den; 819 820 Bt_dense = multtrans->Bt_den; 821 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 822 823 /* Get Bt_dense by Apply MatTransposeColoring to B */ 824 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 825 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 826 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 827 etime0 += tf - t0; 828 829 /* C_dense = A*Bt_dense */ 830 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 831 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 832 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 833 etime2 += tf - t0; 834 835 /* Recover C from C_dense */ 836 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 837 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 838 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 839 etime1 += tf - t0; 840 #if defined(PETSC_USE_INFO) 841 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 842 #endif 843 PetscFunctionReturn(0); 844 } 845 846 #if defined(USE_ARRAY) 847 /* allocate an array for implementing sparse inner-product */ 848 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 849 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 850 #endif 851 852 /* clear old values in C */ 853 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 854 855 for (i=0; i<cm; i++) { 856 anzi = ai[i+1] - ai[i]; 857 acol = aj + ai[i]; 858 aval = aa + ai[i]; 859 cnzi = ci[i+1] - ci[i]; 860 ccol = cj + ci[i]; 861 cval = ca + ci[i]; 862 for (j=0; j<cnzi; j++){ 863 brow = ccol[j]; 864 bnzj = bi[brow+1] - bi[brow]; 865 bcol = bj + bi[brow]; 866 bval = ba + bi[brow]; 867 868 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 869 #if defined(USE_ARRAY) 870 /* put ba to spdot array */ 871 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 872 /* c(i,j)=A[i,:]*B[j,:]^T */ 873 for (nexta=0; nexta<anzi; nexta++){ 874 cval[j] += spdot[acol[nexta]]*aval[nexta]; 875 } 876 /* zero spdot array */ 877 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 878 #else 879 nexta = 0; nextb = 0; 880 while (nexta<anzi && nextb<bnzj){ 881 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 882 if (nexta == anzi) break; 883 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 884 if (nextb == bnzj) break; 885 if (acol[nexta] == bcol[nextb]){ 886 cval[j] += aval[nexta]*bval[nextb]; 887 nexta++; nextb++; 888 flops += 2; 889 } 890 } 891 #endif 892 } 893 } 894 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 897 #if defined(USE_ARRAY) 898 ierr = PetscFree(spdot); 899 #endif 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 905 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 if (scall == MAT_INITIAL_MATRIX){ 910 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 911 } 912 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 918 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 919 { 920 PetscErrorCode ierr; 921 Mat At; 922 PetscInt *ati,*atj; 923 924 PetscFunctionBegin; 925 /* create symbolic At */ 926 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 927 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 928 929 /* get symbolic C=At*B */ 930 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 931 932 /* clean up */ 933 ierr = MatDestroy(&At);CHKERRQ(ierr); 934 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 940 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 941 { 942 PetscErrorCode ierr; 943 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 944 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 945 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 946 PetscLogDouble flops=0.0; 947 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 948 949 PetscFunctionBegin; 950 /* clear old values in C */ 951 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 952 953 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 954 for (i=0;i<am;i++) { 955 bj = b->j + bi[i]; 956 ba = b->a + bi[i]; 957 bnzi = bi[i+1] - bi[i]; 958 anzi = ai[i+1] - ai[i]; 959 for (j=0; j<anzi; j++) { 960 nextb = 0; 961 crow = *aj++; 962 cjj = cj + ci[crow]; 963 caj = ca + ci[crow]; 964 /* perform sparse axpy operation. Note cjj includes bj. */ 965 for (k=0; nextb<bnzi; k++) { 966 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 967 caj[k] += (*aa)*(*(ba+nextb)); 968 nextb++; 969 } 970 } 971 flops += 2*bnzi; 972 aa++; 973 } 974 } 975 976 /* Assemble the final matrix and clean up */ 977 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 EXTERN_C_BEGIN 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 986 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 987 { 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 if (scall == MAT_INITIAL_MATRIX){ 992 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 993 } 994 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 EXTERN_C_END 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1001 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1002 { 1003 PetscErrorCode ierr; 1004 1005 PetscFunctionBegin; 1006 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1007 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1013 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1014 { 1015 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1018 MatScalar *aa; 1019 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1020 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1021 1022 PetscFunctionBegin; 1023 if (!cm || !cn) PetscFunctionReturn(0); 1024 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1025 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1026 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1027 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1028 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1029 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1030 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1031 colam = col*am; 1032 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1033 r1 = r2 = r3 = r4 = 0.0; 1034 n = a->i[i+1] - a->i[i]; 1035 aj = a->j + a->i[i]; 1036 aa = a->a + a->i[i]; 1037 for (j=0; j<n; j++) { 1038 r1 += (*aa)*b1[*aj]; 1039 r2 += (*aa)*b2[*aj]; 1040 r3 += (*aa)*b3[*aj]; 1041 r4 += (*aa++)*b4[*aj++]; 1042 } 1043 c[colam + i] = r1; 1044 c[colam + am + i] = r2; 1045 c[colam + am2 + i] = r3; 1046 c[colam + am3 + i] = r4; 1047 } 1048 b1 += bm4; 1049 b2 += bm4; 1050 b3 += bm4; 1051 b4 += bm4; 1052 } 1053 for (;col<cn; col++){ /* over extra columns of C */ 1054 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1055 r1 = 0.0; 1056 n = a->i[i+1] - a->i[i]; 1057 aj = a->j + a->i[i]; 1058 aa = a->a + a->i[i]; 1059 1060 for (j=0; j<n; j++) { 1061 r1 += (*aa++)*b1[*aj++]; 1062 } 1063 c[col*am + i] = r1; 1064 } 1065 b1 += bm; 1066 } 1067 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1068 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1069 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1070 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1071 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 /* 1076 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1077 */ 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1080 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1081 { 1082 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1085 MatScalar *aa; 1086 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1087 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1088 1089 PetscFunctionBegin; 1090 if (!cm || !cn) PetscFunctionReturn(0); 1091 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1092 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1093 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1094 1095 if (a->compressedrow.use){ /* use compressed row format */ 1096 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1097 colam = col*am; 1098 arm = a->compressedrow.nrows; 1099 ii = a->compressedrow.i; 1100 ridx = a->compressedrow.rindex; 1101 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1102 r1 = r2 = r3 = r4 = 0.0; 1103 n = ii[i+1] - ii[i]; 1104 aj = a->j + ii[i]; 1105 aa = a->a + ii[i]; 1106 for (j=0; j<n; j++) { 1107 r1 += (*aa)*b1[*aj]; 1108 r2 += (*aa)*b2[*aj]; 1109 r3 += (*aa)*b3[*aj]; 1110 r4 += (*aa++)*b4[*aj++]; 1111 } 1112 c[colam + ridx[i]] += r1; 1113 c[colam + am + ridx[i]] += r2; 1114 c[colam + am2 + ridx[i]] += r3; 1115 c[colam + am3 + ridx[i]] += r4; 1116 } 1117 b1 += bm4; 1118 b2 += bm4; 1119 b3 += bm4; 1120 b4 += bm4; 1121 } 1122 for (;col<cn; col++){ /* over extra columns of C */ 1123 colam = col*am; 1124 arm = a->compressedrow.nrows; 1125 ii = a->compressedrow.i; 1126 ridx = a->compressedrow.rindex; 1127 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1128 r1 = 0.0; 1129 n = ii[i+1] - ii[i]; 1130 aj = a->j + ii[i]; 1131 aa = a->a + ii[i]; 1132 1133 for (j=0; j<n; j++) { 1134 r1 += (*aa++)*b1[*aj++]; 1135 } 1136 c[col*am + ridx[i]] += r1; 1137 } 1138 b1 += bm; 1139 } 1140 } else { 1141 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1142 colam = col*am; 1143 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1144 r1 = r2 = r3 = r4 = 0.0; 1145 n = a->i[i+1] - a->i[i]; 1146 aj = a->j + a->i[i]; 1147 aa = a->a + a->i[i]; 1148 for (j=0; j<n; j++) { 1149 r1 += (*aa)*b1[*aj]; 1150 r2 += (*aa)*b2[*aj]; 1151 r3 += (*aa)*b3[*aj]; 1152 r4 += (*aa++)*b4[*aj++]; 1153 } 1154 c[colam + i] += r1; 1155 c[colam + am + i] += r2; 1156 c[colam + am2 + i] += r3; 1157 c[colam + am3 + i] += r4; 1158 } 1159 b1 += bm4; 1160 b2 += bm4; 1161 b3 += bm4; 1162 b4 += bm4; 1163 } 1164 for (;col<cn; col++){ /* over extra columns of C */ 1165 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1166 r1 = 0.0; 1167 n = a->i[i+1] - a->i[i]; 1168 aj = a->j + a->i[i]; 1169 aa = a->a + a->i[i]; 1170 1171 for (j=0; j<n; j++) { 1172 r1 += (*aa++)*b1[*aj++]; 1173 } 1174 c[col*am + i] += r1; 1175 } 1176 b1 += bm; 1177 } 1178 } 1179 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1180 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1181 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1187 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1188 { 1189 PetscErrorCode ierr; 1190 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1191 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1192 PetscInt *bi=b->i,*bj=b->j; 1193 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1194 MatScalar *btval,*btval_den,*ba=b->a; 1195 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1196 1197 PetscFunctionBegin; 1198 btval_den=btdense->v; 1199 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1200 for (k=0; k<ncolors; k++) { 1201 ncolumns = coloring->ncolumns[k]; 1202 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1203 col = *(columns + colorforcol[k] + l); 1204 btcol = bj + bi[col]; 1205 btval = ba + bi[col]; 1206 anz = bi[col+1] - bi[col]; 1207 for (j=0; j<anz; j++){ 1208 brow = btcol[j]; 1209 btval_den[brow] = btval[j]; 1210 } 1211 } 1212 btval_den += m; 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1219 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1223 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1224 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1225 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1226 1227 PetscFunctionBegin; 1228 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1229 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1230 cp_den = ca_den; 1231 for (k=0; k<ncolors; k++) { 1232 nrows = matcoloring->nrows[k]; 1233 row = rows + colorforrow[k]; 1234 idx = spidx + colorforrow[k]; 1235 for (l=0; l<nrows; l++){ 1236 ca[idx[l]] = cp_den[row[l]]; 1237 } 1238 cp_den += m; 1239 } 1240 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 /* 1245 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1246 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1247 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1248 */ 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1251 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1252 { 1253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1256 PetscInt nz = a->i[m],row,*jj,mr,col; 1257 PetscInt *cspidx; 1258 1259 PetscFunctionBegin; 1260 *nn = n; 1261 if (!ia) PetscFunctionReturn(0); 1262 if (symmetric) { 1263 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1264 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1267 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1268 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1269 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1270 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1271 jj = a->j; 1272 for (i=0; i<nz; i++) { 1273 collengths[jj[i]]++; 1274 } 1275 cia[0] = oshift; 1276 for (i=0; i<n; i++) { 1277 cia[i+1] = cia[i] + collengths[i]; 1278 } 1279 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1280 jj = a->j; 1281 for (row=0; row<m; row++) { 1282 mr = a->i[row+1] - a->i[row]; 1283 for (i=0; i<mr; i++) { 1284 col = *jj++; 1285 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1286 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1287 } 1288 } 1289 ierr = PetscFree(collengths);CHKERRQ(ierr); 1290 *ia = cia; *ja = cja; 1291 *spidx = cspidx; 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1298 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1299 { 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 if (!ia) PetscFunctionReturn(0); 1304 1305 ierr = PetscFree(*ia);CHKERRQ(ierr); 1306 ierr = PetscFree(*ja);CHKERRQ(ierr); 1307 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1313 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1314 { 1315 PetscErrorCode ierr; 1316 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1317 const PetscInt *is; 1318 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1319 IS *isa; 1320 PetscBool done; 1321 PetscBool flg1,flg2; 1322 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1323 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1324 PetscInt *colorforcol,*columns,*columns_i; 1325 1326 PetscFunctionBegin; 1327 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1328 1329 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1330 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1331 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1332 if (flg1 || flg2) { 1333 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1334 } 1335 1336 N = mat->cmap->N/bs; 1337 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1338 c->N = mat->cmap->N/bs; 1339 c->m = mat->rmap->N/bs; 1340 c->rstart = 0; 1341 1342 c->ncolors = nis; 1343 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1344 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1345 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1346 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1347 colorforrow[0] = 0; 1348 rows_i = rows; 1349 columnsforspidx_i = columnsforspidx; 1350 1351 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1352 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1353 colorforcol[0] = 0; 1354 columns_i = columns; 1355 1356 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1357 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1358 1359 cm = c->m; 1360 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1361 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1362 for (i=0; i<nis; i++) { 1363 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1364 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1365 c->ncolumns[i] = n; 1366 if (n) { 1367 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1368 } 1369 colorforcol[i+1] = colorforcol[i] + n; 1370 columns_i += n; 1371 1372 /* fast, crude version requires O(N*N) work */ 1373 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1374 1375 /* loop over columns*/ 1376 for (j=0; j<n; j++) { 1377 col = is[j]; 1378 row_idx = cj + ci[col]; 1379 m = ci[col+1] - ci[col]; 1380 /* loop over columns marking them in rowhit */ 1381 for (k=0; k<m; k++) { 1382 idxhit[*row_idx] = spidx[ci[col] + k]; 1383 rowhit[*row_idx++] = col + 1; 1384 } 1385 } 1386 /* count the number of hits */ 1387 nrows = 0; 1388 for (j=0; j<cm; j++) { 1389 if (rowhit[j]) nrows++; 1390 } 1391 c->nrows[i] = nrows; 1392 colorforrow[i+1] = colorforrow[i] + nrows; 1393 1394 nrows = 0; 1395 for (j=0; j<cm; j++) { 1396 if (rowhit[j]) { 1397 rows_i[nrows] = j; 1398 columnsforspidx_i[nrows] = idxhit[j]; 1399 nrows++; 1400 } 1401 } 1402 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1403 rows_i += nrows; columnsforspidx_i += nrows; 1404 } 1405 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1406 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1407 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1408 #if defined(PETSC_USE_DEBUG) 1409 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1410 #endif 1411 1412 c->colorforrow = colorforrow; 1413 c->rows = rows; 1414 c->columnsforspidx = columnsforspidx; 1415 c->colorforcol = colorforcol; 1416 c->columns = columns; 1417 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420