1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 12 EXTERN_C_BEGIN 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 15 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 16 { 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 if (scall == MAT_INITIAL_MATRIX){ 21 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 22 } 23 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 24 PetscFunctionReturn(0); 25 } 26 EXTERN_C_END 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 30 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 31 { 32 PetscErrorCode ierr; 33 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 34 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 35 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 36 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 37 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 38 MatScalar *ca; 39 PetscBT lnkbt; 40 PetscReal afill; 41 42 PetscFunctionBegin; 43 /* Set up */ 44 /* Allocate ci array, arrays for fill computation and */ 45 /* free space for accumulating nonzero column info */ 46 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 47 ci[0] = 0; 48 49 /* create and initialize a linked list */ 50 nlnk = bn+1; 51 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 52 53 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 54 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 55 current_space = free_space; 56 57 /* Determine symbolic info for each row of the product: */ 58 for (i=0;i<am;i++) { 59 anzi = ai[i+1] - ai[i]; 60 cnzi = 0; 61 j = anzi; 62 aj = a->j + ai[i]; 63 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 64 j--; 65 brow = *(aj + j); 66 bnzj = bi[brow+1] - bi[brow]; 67 bjj = bj + bi[brow]; 68 /* add non-zero cols of B into the sorted linked list lnk */ 69 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 70 cnzi += nlnk; 71 } 72 73 /* If free space is not available, make more free space */ 74 /* Double the amount of total space in the list */ 75 if (current_space->local_remaining<cnzi) { 76 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 77 nspacedouble++; 78 } 79 80 /* Copy data into free space, then initialize lnk */ 81 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 82 current_space->array += cnzi; 83 current_space->local_used += cnzi; 84 current_space->local_remaining -= cnzi; 85 86 ci[i+1] = ci[i] + cnzi; 87 } 88 89 /* Column indices are in the list of free space */ 90 /* Allocate space for cj, initialize cj, and */ 91 /* destroy list of free space and other temporary array(s) */ 92 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 93 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 94 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 95 96 /* Allocate space for ca */ 97 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 98 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 99 100 /* put together the new symbolic matrix */ 101 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 102 103 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 104 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 105 c = (Mat_SeqAIJ *)((*C)->data); 106 c->free_a = PETSC_TRUE; 107 c->free_ij = PETSC_TRUE; 108 c->nonew = 0; 109 110 /* set MatInfo */ 111 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 112 if (afill < 1.0) afill = 1.0; 113 c->maxnz = ci[am]; 114 c->nz = ci[am]; 115 (*C)->info.mallocs = nspacedouble; 116 (*C)->info.fill_ratio_given = fill; 117 (*C)->info.fill_ratio_needed = afill; 118 119 #if defined(PETSC_USE_INFO) 120 if (ci[am]) { 121 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 122 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 123 } else { 124 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 125 } 126 #endif 127 PetscFunctionReturn(0); 128 } 129 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 133 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 134 { 135 PetscErrorCode ierr; 136 PetscLogDouble flops=0.0; 137 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 138 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 139 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 140 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 141 PetscInt am=A->rmap->N,cm=C->rmap->N; 142 PetscInt i,j,k,anzi,bnzi,cnzi,brow,nextb; 143 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a; 144 145 PetscFunctionBegin; 146 /* clean old values in C */ 147 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 148 /* Traverse A row-wise. */ 149 /* Build the ith row in C by summing over nonzero columns in A, */ 150 /* the rows of B corresponding to nonzeros of A. */ 151 for (i=0;i<am;i++) { 152 anzi = ai[i+1] - ai[i]; 153 for (j=0;j<anzi;j++) { 154 brow = *aj++; 155 bnzi = bi[brow+1] - bi[brow]; 156 bjj = bj + bi[brow]; 157 baj = ba + bi[brow]; 158 nextb = 0; 159 for (k=0; nextb<bnzi; k++) { 160 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 161 ca[k] += (*aa)*baj[nextb++]; 162 } 163 } 164 flops += 2*bnzi; 165 aa++; 166 } 167 cnzi = ci[i+1] - ci[i]; 168 ca += cnzi; 169 cj += cnzi; 170 } 171 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 174 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 181 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 if (scall == MAT_INITIAL_MATRIX){ 186 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 187 } 188 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 194 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 195 { 196 PetscErrorCode ierr; 197 Mat At; 198 PetscInt *ati,*atj; 199 200 PetscFunctionBegin; 201 /* create symbolic At */ 202 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 203 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 204 205 /* get symbolic C=At*B */ 206 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 207 208 /* clean up */ 209 ierr = MatDestroy(&At);CHKERRQ(ierr); 210 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 216 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 217 { 218 PetscErrorCode ierr; 219 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 220 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 221 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 222 PetscLogDouble flops=0.0; 223 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 224 225 PetscFunctionBegin; 226 /* clear old values in C */ 227 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 228 229 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 230 for (i=0;i<am;i++) { 231 bj = b->j + bi[i]; 232 ba = b->a + bi[i]; 233 bnzi = bi[i+1] - bi[i]; 234 anzi = ai[i+1] - ai[i]; 235 for (j=0; j<anzi; j++) { 236 nextb = 0; 237 crow = *aj++; 238 cjj = cj + ci[crow]; 239 caj = ca + ci[crow]; 240 /* perform sparse axpy operation. Note cjj includes bj. */ 241 for (k=0; nextb<bnzi; k++) { 242 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 243 caj[k] += (*aa)*(*(ba+nextb)); 244 nextb++; 245 } 246 } 247 flops += 2*bnzi; 248 aa++; 249 } 250 } 251 252 /* Assemble the final matrix and clean up */ 253 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 EXTERN_C_BEGIN 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 262 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 263 { 264 PetscErrorCode ierr; 265 266 PetscFunctionBegin; 267 if (scall == MAT_INITIAL_MATRIX){ 268 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 269 } 270 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 EXTERN_C_END 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 277 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 288 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 289 { 290 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 291 PetscErrorCode ierr; 292 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 293 MatScalar *aa; 294 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 295 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 296 297 PetscFunctionBegin; 298 if (!cm || !cn) PetscFunctionReturn(0); 299 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 300 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 301 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 302 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 303 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 304 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 305 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 306 colam = col*am; 307 for (i=0; i<am; i++) { /* over rows of C in those columns */ 308 r1 = r2 = r3 = r4 = 0.0; 309 n = a->i[i+1] - a->i[i]; 310 aj = a->j + a->i[i]; 311 aa = a->a + a->i[i]; 312 for (j=0; j<n; j++) { 313 r1 += (*aa)*b1[*aj]; 314 r2 += (*aa)*b2[*aj]; 315 r3 += (*aa)*b3[*aj]; 316 r4 += (*aa++)*b4[*aj++]; 317 } 318 c[colam + i] = r1; 319 c[colam + am + i] = r2; 320 c[colam + am2 + i] = r3; 321 c[colam + am3 + i] = r4; 322 } 323 b1 += bm4; 324 b2 += bm4; 325 b3 += bm4; 326 b4 += bm4; 327 } 328 for (;col<cn; col++){ /* over extra columns of C */ 329 for (i=0; i<am; i++) { /* over rows of C in those columns */ 330 r1 = 0.0; 331 n = a->i[i+1] - a->i[i]; 332 aj = a->j + a->i[i]; 333 aa = a->a + a->i[i]; 334 335 for (j=0; j<n; j++) { 336 r1 += (*aa++)*b1[*aj++]; 337 } 338 c[col*am + i] = r1; 339 } 340 b1 += bm; 341 } 342 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 343 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 344 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 345 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 352 */ 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 355 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 356 { 357 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 358 PetscErrorCode ierr; 359 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 360 MatScalar *aa; 361 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 362 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 363 364 PetscFunctionBegin; 365 if (!cm || !cn) PetscFunctionReturn(0); 366 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 367 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 368 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 369 370 if (a->compressedrow.use){ /* use compressed row format */ 371 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 372 colam = col*am; 373 arm = a->compressedrow.nrows; 374 ii = a->compressedrow.i; 375 ridx = a->compressedrow.rindex; 376 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 377 r1 = r2 = r3 = r4 = 0.0; 378 n = ii[i+1] - ii[i]; 379 aj = a->j + ii[i]; 380 aa = a->a + ii[i]; 381 for (j=0; j<n; j++) { 382 r1 += (*aa)*b1[*aj]; 383 r2 += (*aa)*b2[*aj]; 384 r3 += (*aa)*b3[*aj]; 385 r4 += (*aa++)*b4[*aj++]; 386 } 387 c[colam + ridx[i]] += r1; 388 c[colam + am + ridx[i]] += r2; 389 c[colam + am2 + ridx[i]] += r3; 390 c[colam + am3 + ridx[i]] += r4; 391 } 392 b1 += bm4; 393 b2 += bm4; 394 b3 += bm4; 395 b4 += bm4; 396 } 397 for (;col<cn; col++){ /* over extra columns of C */ 398 colam = col*am; 399 arm = a->compressedrow.nrows; 400 ii = a->compressedrow.i; 401 ridx = a->compressedrow.rindex; 402 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 403 r1 = 0.0; 404 n = ii[i+1] - ii[i]; 405 aj = a->j + ii[i]; 406 aa = a->a + ii[i]; 407 408 for (j=0; j<n; j++) { 409 r1 += (*aa++)*b1[*aj++]; 410 } 411 c[col*am + ridx[i]] += r1; 412 } 413 b1 += bm; 414 } 415 } else { 416 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 417 colam = col*am; 418 for (i=0; i<am; i++) { /* over rows of C in those columns */ 419 r1 = r2 = r3 = r4 = 0.0; 420 n = a->i[i+1] - a->i[i]; 421 aj = a->j + a->i[i]; 422 aa = a->a + a->i[i]; 423 for (j=0; j<n; j++) { 424 r1 += (*aa)*b1[*aj]; 425 r2 += (*aa)*b2[*aj]; 426 r3 += (*aa)*b3[*aj]; 427 r4 += (*aa++)*b4[*aj++]; 428 } 429 c[colam + i] += r1; 430 c[colam + am + i] += r2; 431 c[colam + am2 + i] += r3; 432 c[colam + am3 + i] += r4; 433 } 434 b1 += bm4; 435 b2 += bm4; 436 b3 += bm4; 437 b4 += bm4; 438 } 439 for (;col<cn; col++){ /* over extra columns of C */ 440 for (i=0; i<am; i++) { /* over rows of C in those columns */ 441 r1 = 0.0; 442 n = a->i[i+1] - a->i[i]; 443 aj = a->j + a->i[i]; 444 aa = a->a + a->i[i]; 445 446 for (j=0; j<n; j++) { 447 r1 += (*aa++)*b1[*aj++]; 448 } 449 c[col*am + i] += r1; 450 } 451 b1 += bm; 452 } 453 } 454 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 455 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 456 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459