1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 77 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 78 { 79 PetscErrorCode ierr; 80 Mat AP,AP_seq,P_seq,A_loc,C_seq; 81 /* Mat_MPIAIJ *p = (Mat_MPIAIJ*)P->data; */ 82 Mat_MatMatMultMPI *mult; 83 84 int prstart,prend,m=P->m; 85 int rank,prid=10; 86 IS isrow,iscol; 87 Mat P_subseq,*psubseq; 88 89 PetscFunctionBegin; 90 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 91 92 /* compute symbolic and numeric AP = A*P */ 93 ierr = MatMatMult_MPIAIJ_MPIAIJ(A,P,scall,fill,&AP);CHKERRQ(ierr); 94 mult = (Mat_MatMatMultMPI*)AP->spptr; 95 P_seq = mult->bseq[0]; /* = submatrix of P by taking rows of P that equal to nonzero col of A */ 96 A_loc = mult->aseq[0]; /* = submatrix of A by taking all local rows of A */ 97 AP_seq = mult->C_seq; 98 99 if (rank == prid){ 100 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_loc: %d, %d\n",rank,A_loc->m,A_loc->n);CHKERRQ(ierr); 101 ierr = MatView(A_loc,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 102 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] P_seq: %d, %d\n",rank,P_seq->m,P_seq->n);CHKERRQ(ierr); 103 ierr = MatView(P_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 104 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] AP_seq: %d, %d\n",rank,AP_seq->m,AP_seq->n); 105 ierr = MatView(AP_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 106 } 107 108 prstart = mult->brstart; 109 prend = prstart + m; 110 /* 111 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] prstart: %d, prend: %d, dim of P_seq: %d, %d\n",rank,prstart,prend,P_seq->m,P_seq->n); 112 */ 113 114 /* get seq matrix P_subseq by taking local rows of P */ 115 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 116 ierr = ISCreateStride(PETSC_COMM_SELF,P_seq->n,0,1,&iscol);CHKERRQ(ierr); 117 ierr = MatGetSubMatrices(P_seq,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psubseq);CHKERRQ(ierr); 118 P_subseq = psubseq[0]; 119 ierr = ISDestroy(isrow);CHKERRQ(ierr); 120 ierr = ISDestroy(iscol);CHKERRQ(ierr); 121 122 /* compute P_subseq^T*AP_seq */ 123 ierr = MatMatMultTranspose_SeqAIJ_SeqAIJ(P_subseq,AP_seq,scall,fill,&C_seq);CHKERRQ(ierr); 124 if (rank == prid){ 125 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] dim of P_subseq: %d, %d; AP_seq: %d, %d\n",rank,P_subseq->m,P_subseq->n,AP_seq->m,AP_seq->n); 126 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] C_seq: %d, %d\n",rank,C_seq->m,C_seq->n);CHKERRQ(ierr); 127 ierr = MatView(C_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 128 } 129 ierr = MatDestroyMatrices(1,&psubseq);CHKERRQ(ierr); 130 131 /* ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr); */ 132 /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] C_seq dim: %d, %d\n",rank,C_seq->m,C_seq->n); */ 133 134 /* add C_seq into C */ 135 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr); 136 137 /* clean up */ 138 ierr = MatDestroy(AP);CHKERRQ(ierr); 139 ierr = MatDestroy(C_seq);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 146 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 147 { 148 149 PetscFunctionBegin; 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 155 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 156 { 157 158 PetscFunctionBegin; 159 PetscFunctionReturn(0); 160 } 161 162 #undef __FUNCT__ 163 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 164 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 165 { 166 PetscErrorCode ierr; 167 PetscFunctionBegin; 168 if (scall == MAT_INITIAL_MATRIX){ 169 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 170 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 171 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 172 } 173 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 174 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 175 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatPtAPSymbolic" 181 /* 182 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 183 184 Collective on Mat 185 186 Input Parameters: 187 + A - the matrix 188 - P - the projection matrix 189 190 Output Parameters: 191 . C - the (i,j) structure of the product matrix 192 193 Notes: 194 C will be created and must be destroyed by the user with MatDestroy(). 195 196 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 197 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 198 this (i,j) structure by calling MatPtAPNumeric(). 199 200 Level: intermediate 201 202 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 203 */ 204 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 205 PetscErrorCode ierr; 206 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 207 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 208 209 PetscFunctionBegin; 210 211 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 212 PetscValidType(A,1); 213 MatPreallocated(A); 214 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 215 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 216 217 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 218 PetscValidType(P,2); 219 MatPreallocated(P); 220 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 221 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 222 223 PetscValidPointer(C,3); 224 225 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 226 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 227 228 /* For now, we do not dispatch based on the type of A and P */ 229 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 230 fA = A->ops->ptapsymbolic; 231 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 232 fP = P->ops->ptapsymbolic; 233 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 234 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 235 236 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 237 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 238 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 239 240 PetscFunctionReturn(0); 241 } 242 243 typedef struct { 244 Mat symAP; 245 } Mat_PtAPstruct; 246 247 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 251 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 252 { 253 PetscErrorCode ierr; 254 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 255 256 PetscFunctionBegin; 257 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 258 ierr = PetscFree(ptap);CHKERRQ(ierr); 259 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 265 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 266 PetscErrorCode ierr; 267 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 268 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 269 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 270 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 271 int an=A->N,am=A->M,pn=P->N,pm=P->M; 272 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 273 MatScalar *ca; 274 275 PetscFunctionBegin; 276 /* Get ij structure of P^T */ 277 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 278 ptJ=ptj; 279 280 /* Allocate ci array, arrays for fill computation and */ 281 /* free space for accumulating nonzero column info */ 282 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 283 ci[0] = 0; 284 285 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 286 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 287 ptasparserow = ptadenserow + an; 288 denserow = ptasparserow + an; 289 sparserow = denserow + pn; 290 291 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 292 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 293 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 294 current_space = free_space; 295 296 /* Determine symbolic info for each row of C: */ 297 for (i=0;i<pn;i++) { 298 ptnzi = pti[i+1] - pti[i]; 299 ptanzi = 0; 300 /* Determine symbolic row of PtA: */ 301 for (j=0;j<ptnzi;j++) { 302 arow = *ptJ++; 303 anzj = ai[arow+1] - ai[arow]; 304 ajj = aj + ai[arow]; 305 for (k=0;k<anzj;k++) { 306 if (!ptadenserow[ajj[k]]) { 307 ptadenserow[ajj[k]] = -1; 308 ptasparserow[ptanzi++] = ajj[k]; 309 } 310 } 311 } 312 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 313 ptaj = ptasparserow; 314 cnzi = 0; 315 for (j=0;j<ptanzi;j++) { 316 prow = *ptaj++; 317 pnzj = pi[prow+1] - pi[prow]; 318 pjj = pj + pi[prow]; 319 for (k=0;k<pnzj;k++) { 320 if (!denserow[pjj[k]]) { 321 denserow[pjj[k]] = -1; 322 sparserow[cnzi++] = pjj[k]; 323 } 324 } 325 } 326 327 /* sort sparserow */ 328 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 329 330 /* If free space is not available, make more free space */ 331 /* Double the amount of total space in the list */ 332 if (current_space->local_remaining<cnzi) { 333 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 334 } 335 336 /* Copy data into free space, and zero out denserows */ 337 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 338 current_space->array += cnzi; 339 current_space->local_used += cnzi; 340 current_space->local_remaining -= cnzi; 341 342 for (j=0;j<ptanzi;j++) { 343 ptadenserow[ptasparserow[j]] = 0; 344 } 345 for (j=0;j<cnzi;j++) { 346 denserow[sparserow[j]] = 0; 347 } 348 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 349 /* For now, we will recompute what is needed. */ 350 ci[i+1] = ci[i] + cnzi; 351 } 352 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 353 /* Allocate space for cj, initialize cj, and */ 354 /* destroy list of free space and other temporary array(s) */ 355 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 356 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 357 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 358 359 /* Allocate space for ca */ 360 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 361 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 362 363 /* put together the new matrix */ 364 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 365 366 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 367 /* Since these are PETSc arrays, change flags to free them as necessary. */ 368 c = (Mat_SeqAIJ *)((*C)->data); 369 c->freedata = PETSC_TRUE; 370 c->nonew = 0; 371 372 /* Clean up. */ 373 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 374 375 PetscFunctionReturn(0); 376 } 377 378 #include "src/mat/impls/maij/maij.h" 379 EXTERN_C_BEGIN 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 382 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 383 /* This routine requires testing -- I don't think it works. */ 384 PetscErrorCode ierr; 385 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 386 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 387 Mat P=pp->AIJ; 388 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 389 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 390 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 391 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 392 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 393 MatScalar *ca; 394 395 PetscFunctionBegin; 396 /* Start timer */ 397 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 398 399 /* Get ij structure of P^T */ 400 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 401 402 /* Allocate ci array, arrays for fill computation and */ 403 /* free space for accumulating nonzero column info */ 404 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 405 ci[0] = 0; 406 407 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 408 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 409 ptasparserow = ptadenserow + an; 410 denserow = ptasparserow + an; 411 sparserow = denserow + pn; 412 413 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 414 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 415 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 416 current_space = free_space; 417 418 /* Determine symbolic info for each row of C: */ 419 for (i=0;i<pn/ppdof;i++) { 420 ptnzi = pti[i+1] - pti[i]; 421 ptanzi = 0; 422 ptJ = ptj + pti[i]; 423 for (dof=0;dof<ppdof;dof++) { 424 /* Determine symbolic row of PtA: */ 425 for (j=0;j<ptnzi;j++) { 426 arow = ptJ[j] + dof; 427 anzj = ai[arow+1] - ai[arow]; 428 ajj = aj + ai[arow]; 429 for (k=0;k<anzj;k++) { 430 if (!ptadenserow[ajj[k]]) { 431 ptadenserow[ajj[k]] = -1; 432 ptasparserow[ptanzi++] = ajj[k]; 433 } 434 } 435 } 436 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 437 ptaj = ptasparserow; 438 cnzi = 0; 439 for (j=0;j<ptanzi;j++) { 440 pdof = *ptaj%dof; 441 prow = (*ptaj++)/dof; 442 pnzj = pi[prow+1] - pi[prow]; 443 pjj = pj + pi[prow]; 444 for (k=0;k<pnzj;k++) { 445 if (!denserow[pjj[k]+pdof]) { 446 denserow[pjj[k]+pdof] = -1; 447 sparserow[cnzi++] = pjj[k]+pdof; 448 } 449 } 450 } 451 452 /* sort sparserow */ 453 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 454 455 /* If free space is not available, make more free space */ 456 /* Double the amount of total space in the list */ 457 if (current_space->local_remaining<cnzi) { 458 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 459 } 460 461 /* Copy data into free space, and zero out denserows */ 462 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 463 current_space->array += cnzi; 464 current_space->local_used += cnzi; 465 current_space->local_remaining -= cnzi; 466 467 for (j=0;j<ptanzi;j++) { 468 ptadenserow[ptasparserow[j]] = 0; 469 } 470 for (j=0;j<cnzi;j++) { 471 denserow[sparserow[j]] = 0; 472 } 473 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 474 /* For now, we will recompute what is needed. */ 475 ci[i+1+dof] = ci[i+dof] + cnzi; 476 } 477 } 478 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 479 /* Allocate space for cj, initialize cj, and */ 480 /* destroy list of free space and other temporary array(s) */ 481 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 482 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 483 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 484 485 /* Allocate space for ca */ 486 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 487 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 488 489 /* put together the new matrix */ 490 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 491 492 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 493 /* Since these are PETSc arrays, change flags to free them as necessary. */ 494 c = (Mat_SeqAIJ *)((*C)->data); 495 c->freedata = PETSC_TRUE; 496 c->nonew = 0; 497 498 /* Clean up. */ 499 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 500 501 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 EXTERN_C_END 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatPtAPNumeric" 508 /* 509 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 510 511 Collective on Mat 512 513 Input Parameters: 514 + A - the matrix 515 - P - the projection matrix 516 517 Output Parameters: 518 . C - the product matrix 519 520 Notes: 521 C must have been created by calling MatPtAPSymbolic and must be destroyed by 522 the user using MatDeatroy(). 523 524 This routine is currently only implemented for pairs of AIJ matrices and classes 525 which inherit from AIJ. C will be of type MATAIJ. 526 527 Level: intermediate 528 529 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 530 */ 531 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 532 PetscErrorCode ierr; 533 PetscErrorCode (*fA)(Mat,Mat,Mat); 534 PetscErrorCode (*fP)(Mat,Mat,Mat); 535 536 PetscFunctionBegin; 537 538 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 539 PetscValidType(A,1); 540 MatPreallocated(A); 541 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 542 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 543 544 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 545 PetscValidType(P,2); 546 MatPreallocated(P); 547 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 548 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549 550 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 551 PetscValidType(C,3); 552 MatPreallocated(C); 553 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 554 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 555 556 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 557 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 558 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 559 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 560 561 /* For now, we do not dispatch based on the type of A and P */ 562 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 563 fA = A->ops->ptapnumeric; 564 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 565 fP = P->ops->ptapnumeric; 566 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 567 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 568 569 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 570 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 571 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 572 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 578 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 579 { 580 PetscErrorCode ierr; 581 int flops=0; 582 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 583 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 584 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 585 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 586 int *ci=c->i,*cj=c->j,*cjj; 587 int am=A->M,cn=C->N,cm=C->M; 588 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 589 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 590 591 PetscFunctionBegin; 592 /* Allocate temporary array for storage of one row of A*P */ 593 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 594 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 595 596 apj = (int *)(apa + cn); 597 apjdense = apj + cn; 598 599 /* Clear old values in C */ 600 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 601 602 for (i=0;i<am;i++) { 603 /* Form sparse row of A*P */ 604 anzi = ai[i+1] - ai[i]; 605 apnzj = 0; 606 for (j=0;j<anzi;j++) { 607 prow = *aj++; 608 pnzj = pi[prow+1] - pi[prow]; 609 pjj = pj + pi[prow]; 610 paj = pa + pi[prow]; 611 for (k=0;k<pnzj;k++) { 612 if (!apjdense[pjj[k]]) { 613 apjdense[pjj[k]] = -1; 614 apj[apnzj++] = pjj[k]; 615 } 616 apa[pjj[k]] += (*aa)*paj[k]; 617 } 618 flops += 2*pnzj; 619 aa++; 620 } 621 622 /* Sort the j index array for quick sparse axpy. */ 623 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 624 625 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 626 pnzi = pi[i+1] - pi[i]; 627 for (j=0;j<pnzi;j++) { 628 nextap = 0; 629 crow = *pJ++; 630 cjj = cj + ci[crow]; 631 caj = ca + ci[crow]; 632 /* Perform sparse axpy operation. Note cjj includes apj. */ 633 for (k=0;nextap<apnzj;k++) { 634 if (cjj[k]==apj[nextap]) { 635 caj[k] += (*pA)*apa[apj[nextap++]]; 636 } 637 } 638 flops += 2*apnzj; 639 pA++; 640 } 641 642 /* Zero the current row info for A*P */ 643 for (j=0;j<apnzj;j++) { 644 apa[apj[j]] = 0.; 645 apjdense[apj[j]] = 0; 646 } 647 } 648 649 /* Assemble the final matrix and clean up */ 650 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 651 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 652 ierr = PetscFree(apa);CHKERRQ(ierr); 653 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 654 655 PetscFunctionReturn(0); 656 } 657 658 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 662 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 663 { 664 PetscErrorCode ierr; 665 int flops=0; 666 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 667 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 668 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 669 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 670 int *ci=c->i,*cj=c->j,*cjj; 671 int am=A->M,cn=C->N,cm=C->M; 672 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 673 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 674 675 PetscFunctionBegin; 676 pA=p->a+pi[prstart]; 677 /* Allocate temporary array for storage of one row of A*P */ 678 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 679 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 680 681 apj = (int *)(apa + cn); 682 apjdense = apj + cn; 683 684 /* Clear old values in C */ 685 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 686 687 for (i=0;i<am;i++) { 688 /* Form sparse row of A*P */ 689 anzi = ai[i+1] - ai[i]; 690 apnzj = 0; 691 for (j=0;j<anzi;j++) { 692 prow = *aj++; 693 pnzj = pi[prow+1] - pi[prow]; 694 pjj = pj + pi[prow]; 695 paj = pa + pi[prow]; 696 for (k=0;k<pnzj;k++) { 697 if (!apjdense[pjj[k]]) { 698 apjdense[pjj[k]] = -1; 699 apj[apnzj++] = pjj[k]; 700 } 701 apa[pjj[k]] += (*aa)*paj[k]; 702 } 703 flops += 2*pnzj; 704 aa++; 705 } 706 707 /* Sort the j index array for quick sparse axpy. */ 708 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 709 710 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 711 pnzi = pi[i+1+prstart] - pi[i+prstart]; 712 for (j=0;j<pnzi;j++) { 713 nextap = 0; 714 crow = *pJ++; 715 cjj = cj + ci[crow]; 716 caj = ca + ci[crow]; 717 /* Perform sparse axpy operation. Note cjj includes apj. */ 718 for (k=0;nextap<apnzj;k++) { 719 if (cjj[k]==apj[nextap]) { 720 caj[k] += (*pA)*apa[apj[nextap++]]; 721 } 722 } 723 flops += 2*apnzj; 724 pA++; 725 } 726 727 /* Zero the current row info for A*P */ 728 for (j=0;j<apnzj;j++) { 729 apa[apj[j]] = 0.; 730 apjdense[apj[j]] = 0; 731 } 732 } 733 734 /* Assemble the final matrix and clean up */ 735 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = PetscFree(apa);CHKERRQ(ierr); 738 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 739 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 745 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 746 PetscErrorCode ierr; 747 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 748 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 749 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 750 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 751 int an=A->N,am=A->M,pn=P->N,pm=P->M; 752 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 753 MatScalar *ca; 754 Mat *psub,P_sub; 755 IS isrow,iscol; 756 int m = prend - prstart; 757 758 PetscFunctionBegin; 759 /* Get ij structure of P[rstart:rend,:]^T */ 760 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 761 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 762 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 763 ierr = ISDestroy(isrow);CHKERRQ(ierr); 764 ierr = ISDestroy(iscol);CHKERRQ(ierr); 765 P_sub = psub[0]; 766 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 767 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 768 ptJ=ptj; 769 770 /* Allocate ci array, arrays for fill computation and */ 771 /* free space for accumulating nonzero column info */ 772 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 773 ci[0] = 0; 774 775 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 776 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 777 ptasparserow = ptadenserow + an; 778 denserow = ptasparserow + an; 779 sparserow = denserow + pn; 780 781 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 782 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 783 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 784 current_space = free_space; 785 786 /* Determine symbolic info for each row of C: */ 787 for (i=0;i<pn;i++) { 788 ptnzi = pti[i+1] - pti[i]; 789 ptanzi = 0; 790 /* Determine symbolic row of PtA_reduced: */ 791 for (j=0;j<ptnzi;j++) { 792 arow = *ptJ++; 793 anzj = ai[arow+1] - ai[arow]; 794 ajj = aj + ai[arow]; 795 for (k=0;k<anzj;k++) { 796 if (!ptadenserow[ajj[k]]) { 797 ptadenserow[ajj[k]] = -1; 798 ptasparserow[ptanzi++] = ajj[k]; 799 } 800 } 801 } 802 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 803 ptaj = ptasparserow; 804 cnzi = 0; 805 for (j=0;j<ptanzi;j++) { 806 prow = *ptaj++; 807 pnzj = pi[prow+1] - pi[prow]; 808 pjj = pj + pi[prow]; 809 for (k=0;k<pnzj;k++) { 810 if (!denserow[pjj[k]]) { 811 denserow[pjj[k]] = -1; 812 sparserow[cnzi++] = pjj[k]; 813 } 814 } 815 } 816 817 /* sort sparserow */ 818 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 819 820 /* If free space is not available, make more free space */ 821 /* Double the amount of total space in the list */ 822 if (current_space->local_remaining<cnzi) { 823 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 824 } 825 826 /* Copy data into free space, and zero out denserows */ 827 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 828 current_space->array += cnzi; 829 current_space->local_used += cnzi; 830 current_space->local_remaining -= cnzi; 831 832 for (j=0;j<ptanzi;j++) { 833 ptadenserow[ptasparserow[j]] = 0; 834 } 835 for (j=0;j<cnzi;j++) { 836 denserow[sparserow[j]] = 0; 837 } 838 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 839 /* For now, we will recompute what is needed. */ 840 ci[i+1] = ci[i] + cnzi; 841 } 842 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 843 /* Allocate space for cj, initialize cj, and */ 844 /* destroy list of free space and other temporary array(s) */ 845 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 846 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 847 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 848 849 /* Allocate space for ca */ 850 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 851 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 852 853 /* put together the new matrix */ 854 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 855 856 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 857 /* Since these are PETSc arrays, change flags to free them as necessary. */ 858 c = (Mat_SeqAIJ *)((*C)->data); 859 c->freedata = PETSC_TRUE; 860 c->nonew = 0; 861 862 /* Clean up. */ 863 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 864 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 870 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 871 { 872 PetscErrorCode ierr; 873 PetscFunctionBegin; 874 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 875 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 876 if (scall == MAT_INITIAL_MATRIX){ 877 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 878 } 879 880 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 881 882 PetscFunctionReturn(0); 883 } 884 885