1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 C = P * A * P^T 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 #include "src/mat/utils/freespace.h" 10 11 static int logkey_matmatmult = 0; 12 static int logkey_matmatmult_symbolic = 0; 13 static int logkey_matmatmult_numeric = 0; 14 15 static int logkey_matapplypapt = 0; 16 static int logkey_matapplypapt_symbolic = 0; 17 static int logkey_matapplypapt_numeric = 0; 18 19 /* 20 MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 21 C = A * B; 22 23 Note: C is assumed to be uncreated. 24 If this is not the case, Destroy C before calling this routine. 25 */ 26 #ifdef USE_INTSORT 27 /* 28 This roution is modified by the one below for better performance. 29 The changes are: 30 -- PetscSortInt() is replace by a linked list 31 -- malloc larger Initial FreeSpace 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 35 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 36 { 37 int ierr; 38 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 39 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 40 int aishift=a->indexshift,bishift=b->indexshift; 41 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 42 int *ci,*cj,*denserow,*sparserow; 43 int an=A->N,am=A->M,bn=B->N,bm=B->M; 44 int i,j,k,anzi,brow,bnzj,cnzi; 45 MatScalar *ca; 46 47 PetscFunctionBegin; 48 /* some error checking which could be moved into interface layer */ 49 if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 50 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 51 52 /* Set up timers */ 53 if (!logkey_matmatmult_symbolic) { 54 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 55 } 56 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 57 58 /* Set up */ 59 /* Allocate ci array, arrays for fill computation and */ 60 /* free space for accumulating nonzero column info */ 61 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 62 ci[0] = 0; 63 64 ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 65 ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 66 sparserow = denserow + bn; 67 68 /* Initial FreeSpace size is nnz(B)=bi[bm] */ 69 ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 70 current_space = free_space; 71 72 /* Determine symbolic info for each row of the product: */ 73 for (i=0;i<am;i++) { 74 anzi = ai[i+1] - ai[i]; 75 cnzi = 0; 76 for (j=0;j<anzi;j++) { 77 brow = *aj++; 78 bnzj = bi[brow+1] - bi[brow]; 79 bjj = bj + bi[brow]; 80 for (k=0;k<bnzj;k++) { 81 /* If column is not marked, mark it in compressed and uncompressed locations. */ 82 /* For simplicity, leave uncompressed row unsorted until finished with row, */ 83 /* and increment nonzero count for this row. */ 84 if (!denserow[bjj[k]]) { 85 denserow[bjj[k]] = -1; 86 sparserow[cnzi++] = bjj[k]; 87 } 88 } 89 } 90 91 /* sort sparserow */ 92 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 93 94 /* If free space is not available, make more free space */ 95 /* Double the amount of total space in the list */ 96 if (current_space->local_remaining<cnzi) { 97 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 98 } 99 100 /* Copy data into free space, and zero out denserow */ 101 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 102 current_space->array += cnzi; 103 current_space->local_used += cnzi; 104 current_space->local_remaining -= cnzi; 105 for (j=0;j<cnzi;j++) { 106 denserow[sparserow[j]] = 0; 107 } 108 ci[i+1] = ci[i] + cnzi; 109 } 110 111 /* Column indices are in the list of free space */ 112 /* Allocate space for cj, initialize cj, and */ 113 /* destroy list of free space and other temporary array(s) */ 114 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 115 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 116 ierr = PetscFree(denserow);CHKERRQ(ierr); 117 118 /* Allocate space for ca */ 119 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 120 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 121 122 /* put together the new matrix */ 123 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 124 125 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 126 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 127 c = (Mat_SeqAIJ *)((*C)->data); 128 c->freedata = PETSC_TRUE; 129 c->nonew = 0; 130 131 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 #endif /* USE_INTSORT */ 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 138 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 139 { 140 int ierr; 141 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 142 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 143 int aishift=a->indexshift,bishift=b->indexshift; 144 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 145 int *ci,*cj,*lnk,idx0,idx,bcol; 146 int an=A->N,am=A->M,bn=B->N,bm=B->M; 147 int i,j,k,anzi,brow,bnzj,cnzi; 148 MatScalar *ca; 149 PetscLogDouble t0,t1,etime=0.0; 150 151 PetscFunctionBegin; 152 /* some error checking which could be moved into interface layer */ 153 if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 154 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 155 156 /* Set up timers */ 157 if (!logkey_matmatmult_symbolic) { 158 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 159 } 160 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 161 162 /* Set up */ 163 /* Allocate ci array, arrays for fill computation and */ 164 /* free space for accumulating nonzero column info */ 165 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 166 ci[0] = 0; 167 168 ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 169 for (i=0; i<bn; i++) lnk[i] = -1; 170 171 /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 172 ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 173 current_space = free_space; 174 175 /* Determine symbolic info for each row of the product: */ 176 for (i=0;i<am;i++) { 177 anzi = ai[i+1] - ai[i]; 178 cnzi = 0; 179 lnk[bn] = bn; 180 for (j=0;j<anzi;j++) { 181 brow = *aj++; 182 bnzj = bi[brow+1] - bi[brow]; 183 bjj = bj + bi[brow]; 184 idx = bn; 185 for (k=0;k<bnzj;k++) { 186 bcol = bjj[k]; 187 if (lnk[bcol] == -1) { /* new col */ 188 if (k>0) idx = bjj[k-1]; 189 do { 190 idx0 = idx; 191 idx = lnk[idx0]; 192 } while (bcol > idx); 193 lnk[idx0] = bcol; 194 lnk[bcol] = idx; 195 cnzi++; 196 } 197 } 198 } 199 200 /* If free space is not available, make more free space */ 201 /* Double the amount of total space in the list */ 202 if (current_space->local_remaining<cnzi) { 203 printf("...%d -th row, double space ...\n",i); 204 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 205 } 206 207 /* Copy data into free space, and zero out denserow and lnk */ 208 idx = bn; 209 for (j=0; j<cnzi; j++){ 210 idx0 = idx; 211 idx = lnk[idx0]; 212 *current_space->array++ = idx; 213 lnk[idx0] = -1; 214 } 215 lnk[idx] = -1; 216 217 current_space->local_used += cnzi; 218 current_space->local_remaining -= cnzi; 219 220 ci[i+1] = ci[i] + cnzi; 221 } 222 223 /* Column indices are in the list of free space */ 224 /* Allocate space for cj, initialize cj, and */ 225 /* destroy list of free space and other temporary array(s) */ 226 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 227 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 228 ierr = PetscFree(lnk);CHKERRQ(ierr); 229 230 /* Allocate space for ca */ 231 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 232 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 233 234 /* put together the new matrix */ 235 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 236 237 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 238 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 239 c = (Mat_SeqAIJ *)((*C)->data); 240 c->freedata = PETSC_TRUE; 241 c->nonew = 0; 242 243 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 /* 248 MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 249 C=A*B; 250 Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 254 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 255 { 256 int ierr,flops=0; 257 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 258 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 259 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 260 int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 261 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 262 int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 263 int i,j,k,anzi,bnzi,cnzi,brow; 264 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 265 266 PetscFunctionBegin; 267 268 /* This error checking should be unnecessary if the symbolic was performed */ 269 if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 270 if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 271 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 272 if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 273 274 /* Set up timers */ 275 if (!logkey_matmatmult_numeric) { 276 ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 277 } 278 ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 279 280 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 281 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 282 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 283 /* Traverse A row-wise. */ 284 /* Build the ith row in C by summing over nonzero columns in A, */ 285 /* the rows of B corresponding to nonzeros of A. */ 286 for (i=0;i<am;i++) { 287 anzi = ai[i+1] - ai[i]; 288 for (j=0;j<anzi;j++) { 289 brow = *aj++; 290 bnzi = bi[brow+1] - bi[brow]; 291 bjj = bj + bi[brow]; 292 baj = ba + bi[brow]; 293 for (k=0;k<bnzi;k++) { 294 temp[bjj[k]] += (*aa)*baj[k]; 295 } 296 flops += 2*bnzi; 297 aa++; 298 } 299 /* Store row back into C, and re-zero temp */ 300 cnzi = ci[i+1] - ci[i]; 301 for (j=0;j<cnzi;j++) { 302 ca[j] = temp[cj[j]]; 303 temp[cj[j]] = 0.0; 304 } 305 ca += cnzi; 306 cj += cnzi; 307 } 308 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310 311 /* Free temp */ 312 ierr = PetscFree(temp);CHKERRQ(ierr); 313 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 314 ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 320 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 321 int ierr; 322 323 PetscFunctionBegin; 324 if (!logkey_matmatmult) { 325 ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 326 } 327 ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 328 ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 329 ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 330 ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 335 /* 336 MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 337 C = P * A * P^T; 338 339 Note: C is assumed to be uncreated. 340 If this is not the case, Destroy C before calling this routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 344 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 345 /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 346 /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 347 int ierr; 348 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 350 int aishift=a->indexshift,pishift=p->indexshift; 351 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 352 int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 353 int an=A->N,am=A->M,pn=P->N,pm=P->M; 354 int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 355 MatScalar *ca; 356 357 PetscFunctionBegin; 358 359 /* some error checking which could be moved into interface layer */ 360 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 361 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 362 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 363 364 /* Set up timers */ 365 if (!logkey_matapplypapt_symbolic) { 366 ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 367 } 368 ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 369 370 /* Create ij structure of P^T */ 371 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 372 373 /* Allocate ci array, arrays for fill computation and */ 374 /* free space for accumulating nonzero column info */ 375 ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 376 ci[0] = 0; 377 378 ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 379 ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 380 pasparserow = padenserow + an; 381 denserow = pasparserow + an; 382 sparserow = denserow + pm; 383 384 /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 385 /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 386 ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 387 current_space = free_space; 388 389 /* Determine fill for each row of C: */ 390 for (i=0;i<pm;i++) { 391 pnzi = pi[i+1] - pi[i]; 392 panzi = 0; 393 /* Get symbolic sparse row of PA: */ 394 for (j=0;j<pnzi;j++) { 395 arow = *pj++; 396 anzj = ai[arow+1] - ai[arow]; 397 ajj = aj + ai[arow]; 398 for (k=0;k<anzj;k++) { 399 if (!padenserow[ajj[k]]) { 400 padenserow[ajj[k]] = -1; 401 pasparserow[panzi++] = ajj[k]; 402 } 403 } 404 } 405 /* Using symbolic row of PA, determine symbolic row of C: */ 406 paj = pasparserow; 407 cnzi = 0; 408 for (j=0;j<panzi;j++) { 409 ptrow = *paj++; 410 ptnzj = pti[ptrow+1] - pti[ptrow]; 411 ptjj = ptj + pti[ptrow]; 412 for (k=0;k<ptnzj;k++) { 413 if (!denserow[ptjj[k]]) { 414 denserow[ptjj[k]] = -1; 415 sparserow[cnzi++] = ptjj[k]; 416 } 417 } 418 } 419 420 /* sort sparse representation */ 421 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 422 423 /* If free space is not available, make more free space */ 424 /* Double the amount of total space in the list */ 425 if (current_space->local_remaining<cnzi) { 426 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 427 } 428 429 /* Copy data into free space, and zero out dense row */ 430 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 431 current_space->array += cnzi; 432 current_space->local_used += cnzi; 433 current_space->local_remaining -= cnzi; 434 435 for (j=0;j<panzi;j++) { 436 padenserow[pasparserow[j]] = 0; 437 } 438 for (j=0;j<cnzi;j++) { 439 denserow[sparserow[j]] = 0; 440 } 441 ci[i+1] = ci[i] + cnzi; 442 } 443 /* column indices are in the list of free space */ 444 /* Allocate space for cj, initialize cj, and */ 445 /* destroy list of free space and other temporary array(s) */ 446 ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 447 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 448 ierr = PetscFree(padenserow);CHKERRQ(ierr); 449 450 /* Allocate space for ca */ 451 ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 452 ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 453 454 /* put together the new matrix */ 455 ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 456 457 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 458 /* Since these are PETSc arrays, change flags to free them as necessary. */ 459 c = (Mat_SeqAIJ *)((*C)->data); 460 c->freedata = PETSC_TRUE; 461 c->nonew = 0; 462 463 /* Clean up. */ 464 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 465 466 ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 /* 471 MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 472 C = P * A * P^T; 473 Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 474 */ 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 477 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 478 int ierr,flops=0; 479 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 480 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 481 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 482 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 483 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 484 int *ci=c->i,*cj=c->j; 485 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 486 int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 487 MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 488 489 PetscFunctionBegin; 490 491 /* This error checking should be unnecessary if the symbolic was performed */ 492 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 493 if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 494 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 495 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 496 if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 497 498 /* Set up timers */ 499 if (!logkey_matapplypapt_numeric) { 500 ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 501 } 502 ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 503 504 ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 505 ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 506 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 507 508 paj = (int *)(paa + an); 509 pajdense = paj + an; 510 511 for (i=0;i<pm;i++) { 512 /* Form sparse row of P*A */ 513 pnzi = pi[i+1] - pi[i]; 514 panzj = 0; 515 for (j=0;j<pnzi;j++) { 516 arow = *pj++; 517 anzj = ai[arow+1] - ai[arow]; 518 ajj = aj + ai[arow]; 519 aaj = aa + ai[arow]; 520 for (k=0;k<anzj;k++) { 521 if (!pajdense[ajj[k]]) { 522 pajdense[ajj[k]] = -1; 523 paj[panzj++] = ajj[k]; 524 } 525 paa[ajj[k]] += (*pa)*aaj[k]; 526 } 527 flops += 2*anzj; 528 pa++; 529 } 530 531 /* Sort the j index array for quick sparse axpy. */ 532 ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 533 534 /* Compute P*A*P^T using sparse inner products. */ 535 /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 536 cnzi = ci[i+1] - ci[i]; 537 for (j=0;j<cnzi;j++) { 538 /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 539 ptcol = *cj++; 540 ptnzj = pi[ptcol+1] - pi[ptcol]; 541 ptj = pjj + pi[ptcol]; 542 ptaj = pta + pi[ptcol]; 543 sum = 0.; 544 k1 = 0; 545 k2 = 0; 546 while ((k1<panzj) && (k2<ptnzj)) { 547 if (paj[k1]==ptj[k2]) { 548 sum += paa[paj[k1++]]*ptaj[k2++]; 549 } else if (paj[k1] < ptj[k2]) { 550 k1++; 551 } else /* if (paj[k1] > ptj[k2]) */ { 552 k2++; 553 } 554 } 555 *ca++ = sum; 556 } 557 558 /* Zero the current row info for P*A */ 559 for (j=0;j<panzj;j++) { 560 paa[paj[j]] = 0.; 561 pajdense[paj[j]] = 0; 562 } 563 } 564 565 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 568 ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 574 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 575 int ierr; 576 577 PetscFunctionBegin; 578 if (!logkey_matapplypapt) { 579 ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 580 } 581 ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 582 ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 583 ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 584 ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587