1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 PetscBool scalable=PETSC_FALSE,scalable_new=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 if (scall == MAT_INITIAL_MATRIX){ 24 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 25 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsBool("-matmatmult_scalable_new","Use a scalable but slower C=A*B","",scalable_new,&scalable_new,PETSC_NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsEnd();CHKERRQ(ierr); 28 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 29 if (scalable_new){ 30 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 31 } else if (scalable){ 32 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 33 } else { 34 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 35 } 36 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 37 } 38 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 39 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 40 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 EXTERN_C_END 44 45 /* 46 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 47 Input Parameter: 48 . am, Ai, Aj - number of rows and structure of A 49 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 50 . fill - filll ratio See MatMatMult() 51 52 Output Parameter: 53 . Ci, Cj - structure of C = A*B 54 . nspacedouble - number of extra mallocs 55 */ 56 /* 57 Does not use bitarray 58 */ 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 61 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 62 { 63 PetscErrorCode ierr; 64 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 65 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 66 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 67 const PetscInt *aj=Aj,*bi=Bi,*bj; 68 PetscInt *ci,*cj; 69 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 70 PetscInt lnk_max=bn,*lnk; 71 72 PetscFunctionBegin; 73 /* Allocate ci array, arrays for fill computation and */ 74 /* free space for accumulating nonzero column info */ 75 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 76 ci[0] = 0; 77 78 /* create and initialize a condensed linked list */ 79 crmax = a->rmax*b->rmax; 80 lnk_max = PetscMin(crmax,bn); 81 if (!lnk_max && bn) { 82 lnk_max = bn; /* in case rmax is not defined for A or B */ 83 #if defined(PETSC_USE_DEBUGGING) 84 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 85 #endif 86 } 87 #if defined(DEBUG_MATMATMULT) 88 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 89 #endif 90 ierr = PetscLLCondensedCreate_fast(lnk_max,&lnk);CHKERRQ(ierr); 91 92 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 93 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 94 current_space = free_space; 95 96 /* Determine ci and cj */ 97 for (i=0; i<am; i++) { 98 anzi = Ai[i+1] - Ai[i]; 99 cnzi = 0; 100 aj = Aj + Ai[i]; 101 for (j=0; j<anzi; j++){ 102 brow = aj[j]; 103 bnzj = bi[brow+1] - bi[brow]; 104 bj = Bj + bi[brow]; 105 /* add non-zero cols of B into the condensed sorted linked list lnk */ 106 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 107 ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 108 } 109 cnzi = lnk[1]; 110 ci[i+1] = ci[i] + cnzi; 111 112 /* If free space is not available, make more free space */ 113 /* Double the amount of total space in the list */ 114 if (current_space->local_remaining<cnzi) { 115 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 116 ndouble++; 117 } 118 119 /* Copy data into free space, then initialize lnk */ 120 /* ierr = PetscLLCondensedView_fast(lnk);CHKERRQ(ierr); */ 121 ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 122 /* PetscIntView(cnzi,current_space->array,0); */ 123 current_space->array += cnzi; 124 current_space->local_used += cnzi; 125 current_space->local_remaining -= cnzi; 126 } 127 128 /* Column indices are in the list of free space */ 129 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 130 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 131 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 132 ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 133 134 *Ci = ci; 135 *Cj = cj; 136 *nspacedouble = ndouble; 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 142 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 143 { 144 PetscErrorCode ierr; 145 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 146 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 147 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 148 PetscReal afill; 149 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 150 PetscBT lnkbt; 151 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 152 153 PetscFunctionBegin; 154 /* Get ci and cj */ 155 /*---------------*/ 156 /* Allocate ci array, arrays for fill computation and */ 157 /* free space for accumulating nonzero column info */ 158 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 159 ci[0] = 0; 160 161 /* create and initialize a linked list */ 162 nlnk_max = a->rmax*b->rmax; 163 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; 164 ierr = PetscLLCondensedCreate(nlnk_max,bn,&lnk,&lnkbt);CHKERRQ(ierr); 165 166 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 167 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 168 current_space = free_space; 169 170 /* Determine ci and cj */ 171 for (i=0; i<am; i++) { 172 anzi = ai[i+1] - ai[i]; 173 aj = a->j + ai[i]; 174 for (j=0; j<anzi; j++){ 175 brow = aj[j]; 176 bnzj = bi[brow+1] - bi[brow]; 177 bj = b->j + bi[brow]; 178 /* add non-zero cols of B into the sorted linked list lnk */ 179 ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 180 } 181 cnzi = lnk[0]; 182 183 /* If free space is not available, make more free space */ 184 /* Double the amount of total space in the list */ 185 if (current_space->local_remaining<cnzi) { 186 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 187 ndouble++; 188 } 189 190 /* Copy data into free space, then initialize lnk */ 191 ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 192 current_space->array += cnzi; 193 current_space->local_used += cnzi; 194 current_space->local_remaining -= cnzi; 195 ci[i+1] = ci[i] + cnzi; 196 } 197 198 /* Column indices are in the list of free space */ 199 /* Allocate space for cj, initialize cj, and */ 200 /* destroy list of free space and other temporary array(s) */ 201 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 202 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 203 ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 204 205 /* put together the new symbolic matrix */ 206 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,PETSC_NULL,C);CHKERRQ(ierr); 207 208 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 209 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 210 c = (Mat_SeqAIJ *)((*C)->data); 211 c->free_a = PETSC_FALSE; 212 c->free_ij = PETSC_TRUE; 213 c->nonew = 0; 214 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 215 216 /* set MatInfo */ 217 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 218 if (afill < 1.0) afill = 1.0; 219 c->maxnz = ci[am]; 220 c->nz = ci[am]; 221 (*C)->info.mallocs = ndouble; 222 (*C)->info.fill_ratio_given = fill; 223 (*C)->info.fill_ratio_needed = afill; 224 225 #if defined(PETSC_USE_INFO) 226 if (ci[am]) { 227 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 228 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 229 } else { 230 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 231 } 232 #endif 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 238 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 239 { 240 PetscErrorCode ierr; 241 PetscLogDouble flops=0.0; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 243 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 244 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 245 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 246 PetscInt am=A->rmap->n,cm=C->rmap->n; 247 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 248 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 249 PetscScalar *ab_dense; 250 251 PetscFunctionBegin; 252 /* printf("MatMatMultNumeric_SeqAIJ_SeqAIJ...ca %p\n",c->a); */ 253 if (!c->a){ /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 254 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 255 c->a = ca; 256 c->free_a = PETSC_TRUE; 257 258 ierr = PetscMalloc(B->cmap->N*sizeof(PetscScalar),&ab_dense);CHKERRQ(ierr); 259 ierr = PetscMemzero(ab_dense,B->cmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 260 c->matmult_abdense = ab_dense; 261 } else { 262 ca = c->a; 263 ab_dense = c->matmult_abdense; 264 } 265 266 /* clean old values in C */ 267 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 268 /* Traverse A row-wise. */ 269 /* Build the ith row in C by summing over nonzero columns in A, */ 270 /* the rows of B corresponding to nonzeros of A. */ 271 for (i=0; i<am; i++) { 272 anzi = ai[i+1] - ai[i]; 273 for (j=0; j<anzi; j++) { 274 brow = aj[j]; 275 bnzi = bi[brow+1] - bi[brow]; 276 bjj = bj + bi[brow]; 277 baj = ba + bi[brow]; 278 /* perform dense axpy */ 279 valtmp = aa[j]; 280 for (k=0; k<bnzi; k++) { 281 ab_dense[bjj[k]] += valtmp*baj[k]; 282 } 283 flops += 2*bnzi; 284 } 285 aj += anzi; aa += anzi; 286 287 cnzi = ci[i+1] - ci[i]; 288 for (k=0; k<cnzi; k++) { 289 ca[k] += ab_dense[cj[k]]; 290 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 291 } 292 flops += cnzi; 293 cj += cnzi; ca += cnzi; 294 } 295 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 296 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 297 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 303 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 304 { 305 PetscErrorCode ierr; 306 PetscLogDouble flops=0.0; 307 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 308 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 309 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 310 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 311 PetscInt am=A->rmap->N,cm=C->rmap->N; 312 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 313 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 314 PetscInt nextb; 315 316 PetscFunctionBegin; 317 #if defined(DEBUG_MATMATMULT) 318 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 319 #endif 320 /* clean old values in C */ 321 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 322 /* Traverse A row-wise. */ 323 /* Build the ith row in C by summing over nonzero columns in A, */ 324 /* the rows of B corresponding to nonzeros of A. */ 325 for (i=0;i<am;i++) { 326 anzi = ai[i+1] - ai[i]; 327 cnzi = ci[i+1] - ci[i]; 328 for (j=0;j<anzi;j++) { 329 brow = aj[j]; 330 bnzi = bi[brow+1] - bi[brow]; 331 bjj = bj + bi[brow]; 332 baj = ba + bi[brow]; 333 /* perform sparse axpy */ 334 valtmp = aa[j]; 335 nextb = 0; 336 for (k=0; nextb<bnzi; k++) { 337 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 338 ca[k] += valtmp*baj[nextb++]; 339 } 340 } 341 flops += 2*bnzi; 342 } 343 aj += anzi; aa += anzi; 344 cj += cnzi; ca += cnzi; 345 } 346 347 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 355 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 356 { 357 PetscErrorCode ierr; 358 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 359 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 360 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 361 MatScalar *ca; 362 PetscReal afill; 363 364 PetscFunctionBegin; 365 #if defined(DEBUG_MATMATMULT) 366 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 367 #endif 368 /* Get ci and cj */ 369 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 370 371 /* Allocate space for ca */ 372 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 373 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 374 375 /* put together the new symbolic matrix */ 376 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 377 378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 379 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 380 c = (Mat_SeqAIJ *)((*C)->data); 381 c->free_a = PETSC_TRUE; 382 c->free_ij = PETSC_TRUE; 383 c->nonew = 0; 384 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 385 386 /* set MatInfo */ 387 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 388 if (afill < 1.0) afill = 1.0; 389 c->maxnz = ci[am]; 390 c->nz = ci[am]; 391 (*C)->info.mallocs = nspacedouble; 392 (*C)->info.fill_ratio_given = fill; 393 (*C)->info.fill_ratio_needed = afill; 394 395 #if defined(PETSC_USE_INFO) 396 if (ci[am]) { 397 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 398 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 399 } else { 400 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 401 } 402 #endif 403 PetscFunctionReturn(0); 404 } 405 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 409 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 410 { 411 PetscErrorCode ierr; 412 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 413 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 414 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 415 MatScalar *ca; 416 PetscReal afill; 417 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,nlnk_max,*lnk,ndouble=0; 418 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 419 420 PetscFunctionBegin; 421 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 422 /*---------------------------------------------------------------------------------------------*/ 423 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 424 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 425 ci[0] = 0; 426 427 /* create and initialize a linked list */ 428 nlnk_max = a->rmax*b->rmax; 429 if (!nlnk_max || nlnk_max > bn) nlnk_max = bn; /* in case rmax is not defined for A or B */ 430 ierr = PetscLLCondensedCreate_Scalable(nlnk_max,&lnk);CHKERRQ(ierr); 431 432 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 433 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 434 current_space = free_space; 435 436 /* Determine ci and cj */ 437 for (i=0; i<am; i++) { 438 anzi = ai[i+1] - ai[i]; 439 aj = a->j + ai[i]; 440 for (j=0; j<anzi; j++){ 441 brow = aj[j]; 442 bnzj = bi[brow+1] - bi[brow]; 443 bj = b->j + bi[brow]; 444 /* add non-zero cols of B into the sorted linked list lnk */ 445 ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 446 } 447 cnzi = lnk[0]; 448 449 /* If free space is not available, make more free space */ 450 /* Double the amount of total space in the list */ 451 if (current_space->local_remaining<cnzi) { 452 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 453 ndouble++; 454 } 455 456 /* Copy data into free space, then initialize lnk */ 457 ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 458 current_space->array += cnzi; 459 current_space->local_used += cnzi; 460 current_space->local_remaining -= cnzi; 461 ci[i+1] = ci[i] + cnzi; 462 } 463 464 /* Column indices are in the list of free space */ 465 /* Allocate space for cj, initialize cj, and */ 466 /* destroy list of free space and other temporary array(s) */ 467 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 468 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 469 ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 470 471 /* Allocate space for ca */ 472 /*-----------------------*/ 473 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 474 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 475 476 /* put together the new symbolic matrix */ 477 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 478 479 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 480 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 481 c = (Mat_SeqAIJ *)((*C)->data); 482 c->free_a = PETSC_TRUE; 483 c->free_ij = PETSC_TRUE; 484 c->nonew = 0; 485 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 486 487 /* set MatInfo */ 488 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 489 if (afill < 1.0) afill = 1.0; 490 c->maxnz = ci[am]; 491 c->nz = ci[am]; 492 (*C)->info.mallocs = ndouble; 493 (*C)->info.fill_ratio_given = fill; 494 (*C)->info.fill_ratio_needed = afill; 495 496 #if defined(PETSC_USE_INFO) 497 if (ci[am]) { 498 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr); 499 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 500 } else { 501 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 502 } 503 #endif 504 PetscFunctionReturn(0); 505 } 506 507 508 /* This routine is not used. Should be removed! */ 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 511 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 512 { 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 if (scall == MAT_INITIAL_MATRIX){ 517 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 518 } 519 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 525 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 526 { 527 PetscErrorCode ierr; 528 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 529 530 PetscFunctionBegin; 531 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 532 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 533 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 534 ierr = PetscFree(multtrans);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 540 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 541 { 542 PetscErrorCode ierr; 543 PetscContainer container; 544 Mat_MatMatTransMult *multtrans=PETSC_NULL; 545 546 PetscFunctionBegin; 547 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 548 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 549 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 550 A->ops->destroy = multtrans->destroy; 551 if (A->ops->destroy) { 552 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 553 } 554 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 560 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 561 { 562 PetscErrorCode ierr; 563 Mat Bt; 564 PetscInt *bti,*btj; 565 Mat_MatMatTransMult *multtrans; 566 PetscContainer container; 567 PetscLogDouble t0,tf,etime2=0.0; 568 569 PetscFunctionBegin; 570 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 571 /* create symbolic Bt */ 572 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 573 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 574 575 /* get symbolic C=A*Bt */ 576 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 577 578 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 579 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 580 581 /* attach the supporting struct to C */ 582 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 583 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 584 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 586 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 587 588 multtrans->usecoloring = PETSC_FALSE; 589 multtrans->destroy = (*C)->ops->destroy; 590 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 591 592 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 593 etime2 += tf - t0; 594 595 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 596 if (multtrans->usecoloring){ 597 /* Create MatTransposeColoring from symbolic C=A*B^T */ 598 MatTransposeColoring matcoloring; 599 ISColoring iscoloring; 600 Mat Bt_dense,C_dense; 601 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 602 603 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 604 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 605 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 606 etime0 += tf - t0; 607 608 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 609 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 610 multtrans->matcoloring = matcoloring; 611 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 612 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 613 etime01 += tf - t0; 614 615 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 616 /* Create Bt_dense and C_dense = A*Bt_dense */ 617 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 618 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 619 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 620 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 621 Bt_dense->assembled = PETSC_TRUE; 622 multtrans->Bt_den = Bt_dense; 623 624 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 625 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 626 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 627 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 628 Bt_dense->assembled = PETSC_TRUE; 629 multtrans->ABt_den = C_dense; 630 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 631 etime1 += tf - t0; 632 633 #if defined(PETSC_USE_INFO) 634 { 635 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 636 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 637 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 638 } 639 #endif 640 } 641 /* clean up */ 642 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 643 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 644 645 646 647 #if defined(INEFFICIENT_ALGORITHM) 648 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 649 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 650 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 651 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 652 PetscInt am=A->rmap->N,bm=B->rmap->N; 653 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 654 MatScalar *ca; 655 PetscBT lnkbt; 656 PetscReal afill; 657 658 /* Allocate row pointer array ci */ 659 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 660 ci[0] = 0; 661 662 /* Create and initialize a linked list for C columns */ 663 nlnk = bm+1; 664 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 665 666 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 667 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 668 current_space = free_space; 669 670 /* Determine symbolic info for each row of the product A*B^T: */ 671 for (i=0; i<am; i++) { 672 anzi = ai[i+1] - ai[i]; 673 cnzi = 0; 674 acol = aj + ai[i]; 675 for (j=0; j<bm; j++){ 676 bnzj = bi[j+1] - bi[j]; 677 bcol= bj + bi[j]; 678 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 679 ka = 0; kb = 0; 680 while (ka < anzi && kb < bnzj){ 681 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 682 if (ka == anzi) break; 683 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 684 if (kb == bnzj) break; 685 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 686 index[0] = j; 687 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 688 cnzi++; 689 break; 690 } 691 } 692 } 693 694 /* If free space is not available, make more free space */ 695 /* Double the amount of total space in the list */ 696 if (current_space->local_remaining<cnzi) { 697 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 698 nspacedouble++; 699 } 700 701 /* Copy data into free space, then initialize lnk */ 702 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 703 current_space->array += cnzi; 704 current_space->local_used += cnzi; 705 current_space->local_remaining -= cnzi; 706 707 ci[i+1] = ci[i] + cnzi; 708 } 709 710 711 /* Column indices are in the list of free space. 712 Allocate array cj, copy column indices to cj, and destroy list of free space */ 713 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 714 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 715 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 716 717 /* Allocate space for ca */ 718 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 719 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 720 721 /* put together the new symbolic matrix */ 722 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 723 724 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 725 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 726 c = (Mat_SeqAIJ *)((*C)->data); 727 c->free_a = PETSC_TRUE; 728 c->free_ij = PETSC_TRUE; 729 c->nonew = 0; 730 731 /* set MatInfo */ 732 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 733 if (afill < 1.0) afill = 1.0; 734 c->maxnz = ci[am]; 735 c->nz = ci[am]; 736 (*C)->info.mallocs = nspacedouble; 737 (*C)->info.fill_ratio_given = fill; 738 (*C)->info.fill_ratio_needed = afill; 739 740 #if defined(PETSC_USE_INFO) 741 if (ci[am]) { 742 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 743 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 744 } else { 745 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 746 } 747 #endif 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 755 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 756 { 757 PetscErrorCode ierr; 758 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 759 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 760 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 761 PetscLogDouble flops=0.0; 762 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca,*cval; 763 Mat_MatMatTransMult *multtrans; 764 PetscContainer container; 765 #if defined(USE_ARRAY) 766 MatScalar *spdot; 767 #endif 768 769 PetscFunctionBegin; 770 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 771 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 772 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 773 if (multtrans->usecoloring){ 774 MatTransposeColoring matcoloring = multtrans->matcoloring; 775 Mat Bt_dense; 776 PetscInt m,n; 777 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 778 Mat C_dense = multtrans->ABt_den; 779 780 Bt_dense = multtrans->Bt_den; 781 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 782 783 /* Get Bt_dense by Apply MatTransposeColoring to B */ 784 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 785 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 786 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 787 etime0 += tf - t0; 788 789 /* C_dense = A*Bt_dense */ 790 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 791 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 792 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 793 etime2 += tf - t0; 794 795 /* Recover C from C_dense */ 796 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 797 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 798 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 799 etime1 += tf - t0; 800 #if defined(PETSC_USE_INFO) 801 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 802 #endif 803 PetscFunctionReturn(0); 804 } 805 806 #if defined(USE_ARRAY) 807 /* allocate an array for implementing sparse inner-product */ 808 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 809 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 810 #endif 811 812 /* clear old values in C */ 813 if (!c->a){ 814 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 815 c->a = ca; 816 c->free_a = PETSC_TRUE; 817 } else { 818 ca = c->a; 819 } 820 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 821 822 for (i=0; i<cm; i++) { 823 anzi = ai[i+1] - ai[i]; 824 acol = aj + ai[i]; 825 aval = aa + ai[i]; 826 cnzi = ci[i+1] - ci[i]; 827 ccol = cj + ci[i]; 828 cval = ca + ci[i]; 829 for (j=0; j<cnzi; j++){ 830 brow = ccol[j]; 831 bnzj = bi[brow+1] - bi[brow]; 832 bcol = bj + bi[brow]; 833 bval = ba + bi[brow]; 834 835 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 836 #if defined(USE_ARRAY) 837 /* put ba to spdot array */ 838 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 839 /* c(i,j)=A[i,:]*B[j,:]^T */ 840 for (nexta=0; nexta<anzi; nexta++){ 841 cval[j] += spdot[acol[nexta]]*aval[nexta]; 842 } 843 /* zero spdot array */ 844 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 845 #else 846 nexta = 0; nextb = 0; 847 while (nexta<anzi && nextb<bnzj){ 848 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 849 if (nexta == anzi) break; 850 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 851 if (nextb == bnzj) break; 852 if (acol[nexta] == bcol[nextb]){ 853 cval[j] += aval[nexta]*bval[nextb]; 854 nexta++; nextb++; 855 flops += 2; 856 } 857 } 858 #endif 859 } 860 } 861 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 863 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 864 #if defined(USE_ARRAY) 865 ierr = PetscFree(spdot); 866 #endif 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 872 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 if (scall == MAT_INITIAL_MATRIX){ 877 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 878 } 879 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 885 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 886 { 887 PetscErrorCode ierr; 888 Mat At; 889 PetscInt *ati,*atj; 890 891 PetscFunctionBegin; 892 /* create symbolic At */ 893 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 894 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 895 896 /* get symbolic C=At*B */ 897 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 898 899 /* clean up */ 900 ierr = MatDestroy(&At);CHKERRQ(ierr); 901 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 907 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 908 { 909 PetscErrorCode ierr; 910 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 911 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 912 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 913 PetscLogDouble flops=0.0; 914 MatScalar *aa=a->a,*ba,*ca,*caj; 915 916 PetscFunctionBegin; 917 if (!c->a){ 918 ierr = PetscMalloc((ci[cm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 919 c->a = ca; 920 c->free_a = PETSC_TRUE; 921 } else { 922 ca = c->a; 923 } 924 /* clear old values in C */ 925 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 926 927 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 928 for (i=0;i<am;i++) { 929 bj = b->j + bi[i]; 930 ba = b->a + bi[i]; 931 bnzi = bi[i+1] - bi[i]; 932 anzi = ai[i+1] - ai[i]; 933 for (j=0; j<anzi; j++) { 934 nextb = 0; 935 crow = *aj++; 936 cjj = cj + ci[crow]; 937 caj = ca + ci[crow]; 938 /* perform sparse axpy operation. Note cjj includes bj. */ 939 for (k=0; nextb<bnzi; k++) { 940 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 941 caj[k] += (*aa)*(*(ba+nextb)); 942 nextb++; 943 } 944 } 945 flops += 2*bnzi; 946 aa++; 947 } 948 } 949 950 /* Assemble the final matrix and clean up */ 951 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 952 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 EXTERN_C_BEGIN 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 960 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 if (scall == MAT_INITIAL_MATRIX){ 966 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 967 } 968 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 EXTERN_C_END 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 975 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 976 { 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 981 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 987 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 988 { 989 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 990 PetscErrorCode ierr; 991 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 992 MatScalar *aa; 993 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 994 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 995 996 PetscFunctionBegin; 997 if (!cm || !cn) PetscFunctionReturn(0); 998 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 999 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1000 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1001 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1002 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1003 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1004 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1005 colam = col*am; 1006 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1007 r1 = r2 = r3 = r4 = 0.0; 1008 n = a->i[i+1] - a->i[i]; 1009 aj = a->j + a->i[i]; 1010 aa = a->a + a->i[i]; 1011 for (j=0; j<n; j++) { 1012 r1 += (*aa)*b1[*aj]; 1013 r2 += (*aa)*b2[*aj]; 1014 r3 += (*aa)*b3[*aj]; 1015 r4 += (*aa++)*b4[*aj++]; 1016 } 1017 c[colam + i] = r1; 1018 c[colam + am + i] = r2; 1019 c[colam + am2 + i] = r3; 1020 c[colam + am3 + i] = r4; 1021 } 1022 b1 += bm4; 1023 b2 += bm4; 1024 b3 += bm4; 1025 b4 += bm4; 1026 } 1027 for (;col<cn; col++){ /* over extra columns of C */ 1028 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1029 r1 = 0.0; 1030 n = a->i[i+1] - a->i[i]; 1031 aj = a->j + a->i[i]; 1032 aa = a->a + a->i[i]; 1033 1034 for (j=0; j<n; j++) { 1035 r1 += (*aa++)*b1[*aj++]; 1036 } 1037 c[col*am + i] = r1; 1038 } 1039 b1 += bm; 1040 } 1041 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1042 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1043 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1044 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1045 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 /* 1050 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1051 */ 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1054 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1055 { 1056 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1059 MatScalar *aa; 1060 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1061 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1062 1063 PetscFunctionBegin; 1064 if (!cm || !cn) PetscFunctionReturn(0); 1065 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1066 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1067 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1068 1069 if (a->compressedrow.use){ /* use compressed row format */ 1070 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1071 colam = col*am; 1072 arm = a->compressedrow.nrows; 1073 ii = a->compressedrow.i; 1074 ridx = a->compressedrow.rindex; 1075 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1076 r1 = r2 = r3 = r4 = 0.0; 1077 n = ii[i+1] - ii[i]; 1078 aj = a->j + ii[i]; 1079 aa = a->a + ii[i]; 1080 for (j=0; j<n; j++) { 1081 r1 += (*aa)*b1[*aj]; 1082 r2 += (*aa)*b2[*aj]; 1083 r3 += (*aa)*b3[*aj]; 1084 r4 += (*aa++)*b4[*aj++]; 1085 } 1086 c[colam + ridx[i]] += r1; 1087 c[colam + am + ridx[i]] += r2; 1088 c[colam + am2 + ridx[i]] += r3; 1089 c[colam + am3 + ridx[i]] += r4; 1090 } 1091 b1 += bm4; 1092 b2 += bm4; 1093 b3 += bm4; 1094 b4 += bm4; 1095 } 1096 for (;col<cn; col++){ /* over extra columns of C */ 1097 colam = col*am; 1098 arm = a->compressedrow.nrows; 1099 ii = a->compressedrow.i; 1100 ridx = a->compressedrow.rindex; 1101 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1102 r1 = 0.0; 1103 n = ii[i+1] - ii[i]; 1104 aj = a->j + ii[i]; 1105 aa = a->a + ii[i]; 1106 1107 for (j=0; j<n; j++) { 1108 r1 += (*aa++)*b1[*aj++]; 1109 } 1110 c[col*am + ridx[i]] += r1; 1111 } 1112 b1 += bm; 1113 } 1114 } else { 1115 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1116 colam = col*am; 1117 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1118 r1 = r2 = r3 = r4 = 0.0; 1119 n = a->i[i+1] - a->i[i]; 1120 aj = a->j + a->i[i]; 1121 aa = a->a + a->i[i]; 1122 for (j=0; j<n; j++) { 1123 r1 += (*aa)*b1[*aj]; 1124 r2 += (*aa)*b2[*aj]; 1125 r3 += (*aa)*b3[*aj]; 1126 r4 += (*aa++)*b4[*aj++]; 1127 } 1128 c[colam + i] += r1; 1129 c[colam + am + i] += r2; 1130 c[colam + am2 + i] += r3; 1131 c[colam + am3 + i] += r4; 1132 } 1133 b1 += bm4; 1134 b2 += bm4; 1135 b3 += bm4; 1136 b4 += bm4; 1137 } 1138 for (;col<cn; col++){ /* over extra columns of C */ 1139 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1140 r1 = 0.0; 1141 n = a->i[i+1] - a->i[i]; 1142 aj = a->j + a->i[i]; 1143 aa = a->a + a->i[i]; 1144 1145 for (j=0; j<n; j++) { 1146 r1 += (*aa++)*b1[*aj++]; 1147 } 1148 c[col*am + i] += r1; 1149 } 1150 b1 += bm; 1151 } 1152 } 1153 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1154 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1155 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1161 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1162 { 1163 PetscErrorCode ierr; 1164 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1165 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1166 PetscInt *bi=b->i,*bj=b->j; 1167 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1168 MatScalar *btval,*btval_den,*ba=b->a; 1169 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1170 1171 PetscFunctionBegin; 1172 btval_den=btdense->v; 1173 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1174 for (k=0; k<ncolors; k++) { 1175 ncolumns = coloring->ncolumns[k]; 1176 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1177 col = *(columns + colorforcol[k] + l); 1178 btcol = bj + bi[col]; 1179 btval = ba + bi[col]; 1180 anz = bi[col+1] - bi[col]; 1181 for (j=0; j<anz; j++){ 1182 brow = btcol[j]; 1183 btval_den[brow] = btval[j]; 1184 } 1185 } 1186 btval_den += m; 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1193 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1194 { 1195 PetscErrorCode ierr; 1196 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1197 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1198 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1199 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1200 1201 PetscFunctionBegin; 1202 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1203 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1204 cp_den = ca_den; 1205 for (k=0; k<ncolors; k++) { 1206 nrows = matcoloring->nrows[k]; 1207 row = rows + colorforrow[k]; 1208 idx = spidx + colorforrow[k]; 1209 for (l=0; l<nrows; l++){ 1210 ca[idx[l]] = cp_den[row[l]]; 1211 } 1212 cp_den += m; 1213 } 1214 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1220 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1221 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1222 */ 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1225 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1226 { 1227 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1228 PetscErrorCode ierr; 1229 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1230 PetscInt nz = a->i[m],row,*jj,mr,col; 1231 PetscInt *cspidx; 1232 1233 PetscFunctionBegin; 1234 *nn = n; 1235 if (!ia) PetscFunctionReturn(0); 1236 if (symmetric) { 1237 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1238 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1241 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1242 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1243 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1244 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1245 jj = a->j; 1246 for (i=0; i<nz; i++) { 1247 collengths[jj[i]]++; 1248 } 1249 cia[0] = oshift; 1250 for (i=0; i<n; i++) { 1251 cia[i+1] = cia[i] + collengths[i]; 1252 } 1253 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1254 jj = a->j; 1255 for (row=0; row<m; row++) { 1256 mr = a->i[row+1] - a->i[row]; 1257 for (i=0; i<mr; i++) { 1258 col = *jj++; 1259 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1260 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1261 } 1262 } 1263 ierr = PetscFree(collengths);CHKERRQ(ierr); 1264 *ia = cia; *ja = cja; 1265 *spidx = cspidx; 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1272 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1273 { 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 if (!ia) PetscFunctionReturn(0); 1278 1279 ierr = PetscFree(*ia);CHKERRQ(ierr); 1280 ierr = PetscFree(*ja);CHKERRQ(ierr); 1281 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1287 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1288 { 1289 PetscErrorCode ierr; 1290 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1291 const PetscInt *is; 1292 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1293 IS *isa; 1294 PetscBool done; 1295 PetscBool flg1,flg2; 1296 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1297 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1298 PetscInt *colorforcol,*columns,*columns_i; 1299 1300 PetscFunctionBegin; 1301 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1302 1303 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1304 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1305 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1306 if (flg1 || flg2) { 1307 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1308 } 1309 1310 N = mat->cmap->N/bs; 1311 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1312 c->N = mat->cmap->N/bs; 1313 c->m = mat->rmap->N/bs; 1314 c->rstart = 0; 1315 1316 c->ncolors = nis; 1317 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1318 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1319 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1320 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1321 colorforrow[0] = 0; 1322 rows_i = rows; 1323 columnsforspidx_i = columnsforspidx; 1324 1325 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1326 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1327 colorforcol[0] = 0; 1328 columns_i = columns; 1329 1330 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1331 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1332 1333 cm = c->m; 1334 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1335 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1336 for (i=0; i<nis; i++) { 1337 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1338 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1339 c->ncolumns[i] = n; 1340 if (n) { 1341 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1342 } 1343 colorforcol[i+1] = colorforcol[i] + n; 1344 columns_i += n; 1345 1346 /* fast, crude version requires O(N*N) work */ 1347 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1348 1349 /* loop over columns*/ 1350 for (j=0; j<n; j++) { 1351 col = is[j]; 1352 row_idx = cj + ci[col]; 1353 m = ci[col+1] - ci[col]; 1354 /* loop over columns marking them in rowhit */ 1355 for (k=0; k<m; k++) { 1356 idxhit[*row_idx] = spidx[ci[col] + k]; 1357 rowhit[*row_idx++] = col + 1; 1358 } 1359 } 1360 /* count the number of hits */ 1361 nrows = 0; 1362 for (j=0; j<cm; j++) { 1363 if (rowhit[j]) nrows++; 1364 } 1365 c->nrows[i] = nrows; 1366 colorforrow[i+1] = colorforrow[i] + nrows; 1367 1368 nrows = 0; 1369 for (j=0; j<cm; j++) { 1370 if (rowhit[j]) { 1371 rows_i[nrows] = j; 1372 columnsforspidx_i[nrows] = idxhit[j]; 1373 nrows++; 1374 } 1375 } 1376 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1377 rows_i += nrows; columnsforspidx_i += nrows; 1378 } 1379 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1380 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1381 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1382 #if defined(PETSC_USE_DEBUG) 1383 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1384 #endif 1385 1386 c->colorforrow = colorforrow; 1387 c->rows = rows; 1388 c->columnsforspidx = columnsforspidx; 1389 c->colorforcol = colorforcol; 1390 c->columns = columns; 1391 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394