1 #define PETSCMAT_DLL 2 3 /* 4 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 5 C = A * B 6 */ 7 8 #include "../src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 9 #include "../src/mat/utils/freespace.h" 10 #include "petscbt.h" 11 #include "../src/mat/impls/dense/seq/dense.h" /*I "petscmat.h" I*/ 12 13 EXTERN_C_BEGIN 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 16 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 17 { 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 if (scall == MAT_INITIAL_MATRIX){ 22 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 23 } 24 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 25 PetscFunctionReturn(0); 26 } 27 EXTERN_C_END 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 31 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 32 { 33 PetscErrorCode ierr; 34 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 35 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 36 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 37 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 38 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 39 MatScalar *ca; 40 PetscBT lnkbt; 41 PetscReal afill; 42 43 PetscFunctionBegin; 44 /* Set up */ 45 /* Allocate ci array, arrays for fill computation and */ 46 /* free space for accumulating nonzero column info */ 47 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 48 ci[0] = 0; 49 50 /* create and initialize a linked list */ 51 nlnk = bn+1; 52 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 53 54 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 55 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 56 current_space = free_space; 57 58 /* Determine symbolic info for each row of the product: */ 59 for (i=0;i<am;i++) { 60 anzi = ai[i+1] - ai[i]; 61 cnzi = 0; 62 j = anzi; 63 aj = a->j + ai[i]; 64 while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 65 j--; 66 brow = *(aj + j); 67 bnzj = bi[brow+1] - bi[brow]; 68 bjj = bj + bi[brow]; 69 /* add non-zero cols of B into the sorted linked list lnk */ 70 ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 71 cnzi += nlnk; 72 } 73 74 /* If free space is not available, make more free space */ 75 /* Double the amount of total space in the list */ 76 if (current_space->local_remaining<cnzi) { 77 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 78 nspacedouble++; 79 } 80 81 /* Copy data into free space, then initialize lnk */ 82 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 83 current_space->array += cnzi; 84 current_space->local_used += cnzi; 85 current_space->local_remaining -= cnzi; 86 87 ci[i+1] = ci[i] + cnzi; 88 } 89 90 /* Column indices are in the list of free space */ 91 /* Allocate space for cj, initialize cj, and */ 92 /* destroy list of free space and other temporary array(s) */ 93 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 94 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 95 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 96 97 /* Allocate space for ca */ 98 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 99 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 100 101 /* put together the new symbolic matrix */ 102 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 103 104 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 105 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 106 c = (Mat_SeqAIJ *)((*C)->data); 107 c->free_a = PETSC_TRUE; 108 c->free_ij = PETSC_TRUE; 109 c->nonew = 0; 110 111 /* set MatInfo */ 112 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 113 if (afill < 1.0) afill = 1.0; 114 c->maxnz = ci[am]; 115 c->nz = ci[am]; 116 (*C)->info.mallocs = nspacedouble; 117 (*C)->info.fill_ratio_given = fill; 118 (*C)->info.fill_ratio_needed = afill; 119 120 #if defined(PETSC_USE_INFO) 121 if (ci[am]) { 122 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 123 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 124 } else { 125 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 126 } 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 134 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 135 { 136 PetscErrorCode ierr; 137 PetscLogDouble flops=0.0; 138 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 139 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 140 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 141 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 142 PetscInt am=A->rmap->N,cm=C->rmap->N; 143 PetscInt i,j,k,anzi,bnzi,cnzi,brow,nextb; 144 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a; 145 146 PetscFunctionBegin; 147 /* clean old values in C */ 148 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 149 /* Traverse A row-wise. */ 150 /* Build the ith row in C by summing over nonzero columns in A, */ 151 /* the rows of B corresponding to nonzeros of A. */ 152 for (i=0;i<am;i++) { 153 anzi = ai[i+1] - ai[i]; 154 for (j=0;j<anzi;j++) { 155 brow = *aj++; 156 bnzi = bi[brow+1] - bi[brow]; 157 bjj = bj + bi[brow]; 158 baj = ba + bi[brow]; 159 nextb = 0; 160 for (k=0; nextb<bnzi; k++) { 161 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 162 ca[k] += (*aa)*baj[nextb++]; 163 } 164 } 165 flops += 2*bnzi; 166 aa++; 167 } 168 cnzi = ci[i+1] - ci[i]; 169 ca += cnzi; 170 cj += cnzi; 171 } 172 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 174 175 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 182 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 if (scall == MAT_INITIAL_MATRIX){ 187 ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 188 } 189 ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 195 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 196 { 197 PetscErrorCode ierr; 198 Mat At; 199 PetscInt *ati,*atj; 200 201 PetscFunctionBegin; 202 /* create symbolic At */ 203 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 204 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 205 206 /* get symbolic C=At*B */ 207 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 208 209 /* clean up */ 210 ierr = MatDestroy(At);CHKERRQ(ierr); 211 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 217 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 218 { 219 PetscErrorCode ierr; 220 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 221 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 222 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 223 PetscLogDouble flops=0.0; 224 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 225 226 PetscFunctionBegin; 227 /* clear old values in C */ 228 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 229 230 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 231 for (i=0;i<am;i++) { 232 bj = b->j + bi[i]; 233 ba = b->a + bi[i]; 234 bnzi = bi[i+1] - bi[i]; 235 anzi = ai[i+1] - ai[i]; 236 for (j=0; j<anzi; j++) { 237 nextb = 0; 238 crow = *aj++; 239 cjj = cj + ci[crow]; 240 caj = ca + ci[crow]; 241 /* perform sparse axpy operation. Note cjj includes bj. */ 242 for (k=0; nextb<bnzi; k++) { 243 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 244 caj[k] += (*aa)*(*(ba+nextb)); 245 nextb++; 246 } 247 } 248 flops += 2*bnzi; 249 aa++; 250 } 251 } 252 253 /* Assemble the final matrix and clean up */ 254 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 256 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 260 EXTERN_C_BEGIN 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 263 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 264 { 265 PetscErrorCode ierr; 266 267 PetscFunctionBegin; 268 if (scall == MAT_INITIAL_MATRIX){ 269 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 270 } 271 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 EXTERN_C_END 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 278 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 279 { 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 289 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 290 { 291 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 294 MatScalar *aa; 295 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 296 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 297 298 PetscFunctionBegin; 299 if (!cm || !cn) PetscFunctionReturn(0); 300 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 301 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 302 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 303 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 304 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 305 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 306 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 307 colam = col*am; 308 for (i=0; i<am; i++) { /* over rows of C in those columns */ 309 r1 = r2 = r3 = r4 = 0.0; 310 n = a->i[i+1] - a->i[i]; 311 aj = a->j + a->i[i]; 312 aa = a->a + a->i[i]; 313 for (j=0; j<n; j++) { 314 r1 += (*aa)*b1[*aj]; 315 r2 += (*aa)*b2[*aj]; 316 r3 += (*aa)*b3[*aj]; 317 r4 += (*aa++)*b4[*aj++]; 318 } 319 c[colam + i] = r1; 320 c[colam + am + i] = r2; 321 c[colam + am2 + i] = r3; 322 c[colam + am3 + i] = r4; 323 } 324 b1 += bm4; 325 b2 += bm4; 326 b3 += bm4; 327 b4 += bm4; 328 } 329 for (;col<cn; col++){ /* over extra columns of C */ 330 for (i=0; i<am; i++) { /* over rows of C in those columns */ 331 r1 = 0.0; 332 n = a->i[i+1] - a->i[i]; 333 aj = a->j + a->i[i]; 334 aa = a->a + a->i[i]; 335 336 for (j=0; j<n; j++) { 337 r1 += (*aa++)*b1[*aj++]; 338 } 339 c[col*am + i] = r1; 340 } 341 b1 += bm; 342 } 343 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 344 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 345 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 346 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 353 */ 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 356 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 357 { 358 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 359 PetscErrorCode ierr; 360 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 361 MatScalar *aa; 362 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 363 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 364 365 PetscFunctionBegin; 366 if (!cm || !cn) PetscFunctionReturn(0); 367 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 368 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 369 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 370 371 if (a->compressedrow.use){ /* use compressed row format */ 372 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 373 colam = col*am; 374 arm = a->compressedrow.nrows; 375 ii = a->compressedrow.i; 376 ridx = a->compressedrow.rindex; 377 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 378 r1 = r2 = r3 = r4 = 0.0; 379 n = ii[i+1] - ii[i]; 380 aj = a->j + ii[i]; 381 aa = a->a + ii[i]; 382 for (j=0; j<n; j++) { 383 r1 += (*aa)*b1[*aj]; 384 r2 += (*aa)*b2[*aj]; 385 r3 += (*aa)*b3[*aj]; 386 r4 += (*aa++)*b4[*aj++]; 387 } 388 c[colam + ridx[i]] += r1; 389 c[colam + am + ridx[i]] += r2; 390 c[colam + am2 + ridx[i]] += r3; 391 c[colam + am3 + ridx[i]] += r4; 392 } 393 b1 += bm4; 394 b2 += bm4; 395 b3 += bm4; 396 b4 += bm4; 397 } 398 for (;col<cn; col++){ /* over extra columns of C */ 399 colam = col*am; 400 arm = a->compressedrow.nrows; 401 ii = a->compressedrow.i; 402 ridx = a->compressedrow.rindex; 403 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 404 r1 = 0.0; 405 n = ii[i+1] - ii[i]; 406 aj = a->j + ii[i]; 407 aa = a->a + ii[i]; 408 409 for (j=0; j<n; j++) { 410 r1 += (*aa++)*b1[*aj++]; 411 } 412 c[col*am + ridx[i]] += r1; 413 } 414 b1 += bm; 415 } 416 } else { 417 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 418 colam = col*am; 419 for (i=0; i<am; i++) { /* over rows of C in those columns */ 420 r1 = r2 = r3 = r4 = 0.0; 421 n = a->i[i+1] - a->i[i]; 422 aj = a->j + a->i[i]; 423 aa = a->a + a->i[i]; 424 for (j=0; j<n; j++) { 425 r1 += (*aa)*b1[*aj]; 426 r2 += (*aa)*b2[*aj]; 427 r3 += (*aa)*b3[*aj]; 428 r4 += (*aa++)*b4[*aj++]; 429 } 430 c[colam + i] += r1; 431 c[colam + am + i] += r2; 432 c[colam + am2 + i] += r3; 433 c[colam + am3 + i] += r4; 434 } 435 b1 += bm4; 436 b2 += bm4; 437 b3 += bm4; 438 b4 += bm4; 439 } 440 for (;col<cn; col++){ /* over extra columns of C */ 441 for (i=0; i<am; i++) { /* over rows of C in those columns */ 442 r1 = 0.0; 443 n = a->i[i+1] - a->i[i]; 444 aj = a->j + a->i[i]; 445 aa = a->a + a->i[i]; 446 447 for (j=0; j<n; j++) { 448 r1 += (*aa++)*b1[*aj++]; 449 } 450 c[col*am + i] += r1; 451 } 452 b1 += bm; 453 } 454 } 455 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 456 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 457 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460