1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_PtAPMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 91 ierr = MatDestroyMatrices(1,&ptap->ploc);CHKERRQ(ierr); 92 ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr); 93 94 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 95 ierr = PetscObjectCompose((PetscObject)A,"MatPtApMPI",0);CHKERRQ(ierr); 96 } 97 ierr = PetscFree(ptap);CHKERRQ(ierr); 98 99 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 105 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 106 { 107 PetscErrorCode ierr; 108 Mat P_loc,P_oth,*ploc; 109 PetscInt prstart,low; 110 IS isrow,iscol; 111 Mat_PtAPMPI *ptap; 112 PetscObjectContainer container; 113 114 PetscFunctionBegin; 115 if (scall == MAT_INITIAL_MATRIX){ 116 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 122 ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 123 ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 124 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr); 125 P_loc = ploc[0]; 126 ierr = ISDestroy(isrow);CHKERRQ(ierr); 127 ierr = ISDestroy(iscol);CHKERRQ(ierr); 128 129 /* attach the supporting struct to P for reuse */ 130 ierr = PetscNew(Mat_PtAPMPI,&ptap);CHKERRQ(ierr); 131 ptap->ploc = ploc; 132 ptap->P_oth = P_oth; 133 ptap->prstart = prstart; 134 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 135 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 136 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 137 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 138 139 /* now, compute symbolic product */ 140 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 141 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 142 } else if (scall == MAT_REUSE_MATRIX){ 143 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 144 if (container) { 145 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 146 } else { 147 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 148 } 149 150 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 151 /* improve! */ 152 ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr); 153 ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&ptap->P_oth);CHKERRQ(ierr); 154 155 /* get P_loc by taking all local rows of P */ 156 ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 157 ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 158 ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 159 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_REUSE_MATRIX,&ptap->ploc);CHKERRQ(ierr); 160 ierr = ISDestroy(isrow);CHKERRQ(ierr); 161 ierr = ISDestroy(iscol);CHKERRQ(ierr); 162 } else { 163 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 164 } 165 /* now, compute numeric product */ 166 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 167 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 168 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 169 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 175 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 176 { 177 PetscErrorCode ierr; 178 Mat C_seq; 179 Mat_PtAPMPI *ptap; 180 PetscObjectContainer container; 181 182 PetscFunctionBegin; 183 184 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 185 if (container) { 186 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 187 } else { 188 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 189 } 190 /* compute C_seq = P_loc^T * A_loc * P_seq */ 191 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,fill,&C_seq);CHKERRQ(ierr); 192 193 /* add C_seq into mpi C */ 194 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 195 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 201 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 202 { 203 PetscErrorCode ierr; 204 Mat_Merge_SeqsToMPI *merge; 205 Mat_PtAPMPI *ptap; 206 PetscObjectContainer cont_merge,cont_ptap; 207 208 PetscFunctionBegin; 209 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 210 if (cont_merge) { 211 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 212 } else { 213 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 214 } 215 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 216 if (cont_ptap) { 217 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 218 } else { 219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 220 } 221 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,merge->C_seq);CHKERRQ(ierr); 222 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 228 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 229 { 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 if (scall == MAT_INITIAL_MATRIX){ 234 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 235 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 236 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 237 } 238 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 239 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 240 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 246 247 Collective on Mat 248 249 Input Parameters: 250 + A - the matrix 251 - P - the projection matrix 252 253 Output Parameters: 254 . C - the (i,j) structure of the product matrix 255 256 Notes: 257 C will be created and must be destroyed by the user with MatDestroy(). 258 259 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 260 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 261 this (i,j) structure by calling MatPtAPNumeric(). 262 263 Level: intermediate 264 265 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 266 */ 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatPtAPSymbolic" 269 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 270 { 271 PetscErrorCode ierr; 272 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 273 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 274 275 PetscFunctionBegin; 276 277 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 278 PetscValidType(A,1); 279 MatPreallocated(A); 280 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 281 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 282 283 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 284 PetscValidType(P,2); 285 MatPreallocated(P); 286 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 287 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 288 289 PetscValidPointer(C,3); 290 291 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 292 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 293 294 /* For now, we do not dispatch based on the type of A and P */ 295 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 296 fA = A->ops->ptapsymbolic; 297 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 298 fP = P->ops->ptapsymbolic; 299 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 300 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 301 302 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 303 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 304 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 305 306 PetscFunctionReturn(0); 307 } 308 309 typedef struct { 310 Mat symAP; 311 } Mat_PtAPstruct; 312 313 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 317 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 321 322 PetscFunctionBegin; 323 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 324 ierr = PetscFree(ptap);CHKERRQ(ierr); 325 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 331 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 332 { 333 PetscErrorCode ierr; 334 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 335 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 336 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 337 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 338 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 339 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 340 MatScalar *ca; 341 PetscBT lnkbt; 342 343 PetscFunctionBegin; 344 /* Get ij structure of P^T */ 345 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 346 ptJ=ptj; 347 348 /* Allocate ci array, arrays for fill computation and */ 349 /* free space for accumulating nonzero column info */ 350 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 351 ci[0] = 0; 352 353 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 354 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 355 ptasparserow = ptadenserow + an; 356 357 /* create and initialize a linked list */ 358 nlnk = pn+1; 359 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 360 361 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 362 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 363 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 364 current_space = free_space; 365 366 /* Determine symbolic info for each row of C: */ 367 for (i=0;i<pn;i++) { 368 ptnzi = pti[i+1] - pti[i]; 369 ptanzi = 0; 370 /* Determine symbolic row of PtA: */ 371 for (j=0;j<ptnzi;j++) { 372 arow = *ptJ++; 373 anzj = ai[arow+1] - ai[arow]; 374 ajj = aj + ai[arow]; 375 for (k=0;k<anzj;k++) { 376 if (!ptadenserow[ajj[k]]) { 377 ptadenserow[ajj[k]] = -1; 378 ptasparserow[ptanzi++] = ajj[k]; 379 } 380 } 381 } 382 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 383 ptaj = ptasparserow; 384 cnzi = 0; 385 for (j=0;j<ptanzi;j++) { 386 prow = *ptaj++; 387 pnzj = pi[prow+1] - pi[prow]; 388 pjj = pj + pi[prow]; 389 /* add non-zero cols of P into the sorted linked list lnk */ 390 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 391 cnzi += nlnk; 392 } 393 394 /* If free space is not available, make more free space */ 395 /* Double the amount of total space in the list */ 396 if (current_space->local_remaining<cnzi) { 397 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 398 } 399 400 /* Copy data into free space, and zero out denserows */ 401 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 402 current_space->array += cnzi; 403 current_space->local_used += cnzi; 404 current_space->local_remaining -= cnzi; 405 406 for (j=0;j<ptanzi;j++) { 407 ptadenserow[ptasparserow[j]] = 0; 408 } 409 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 410 /* For now, we will recompute what is needed. */ 411 ci[i+1] = ci[i] + cnzi; 412 } 413 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 414 /* Allocate space for cj, initialize cj, and */ 415 /* destroy list of free space and other temporary array(s) */ 416 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 417 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 418 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 419 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 420 421 /* Allocate space for ca */ 422 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 423 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 424 425 /* put together the new matrix */ 426 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 427 428 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 429 /* Since these are PETSc arrays, change flags to free them as necessary. */ 430 c = (Mat_SeqAIJ *)((*C)->data); 431 c->freedata = PETSC_TRUE; 432 c->nonew = 0; 433 434 /* Clean up. */ 435 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 436 437 PetscFunctionReturn(0); 438 } 439 440 #include "src/mat/impls/maij/maij.h" 441 EXTERN_C_BEGIN 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 444 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 445 { 446 /* This routine requires testing -- I don't think it works. */ 447 PetscErrorCode ierr; 448 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 449 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 450 Mat P=pp->AIJ; 451 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 452 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 453 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 454 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 455 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 456 MatScalar *ca; 457 458 PetscFunctionBegin; 459 /* Start timer */ 460 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 461 462 /* Get ij structure of P^T */ 463 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 464 465 /* Allocate ci array, arrays for fill computation and */ 466 /* free space for accumulating nonzero column info */ 467 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 468 ci[0] = 0; 469 470 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 471 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 472 ptasparserow = ptadenserow + an; 473 denserow = ptasparserow + an; 474 sparserow = denserow + pn; 475 476 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 477 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 478 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 479 current_space = free_space; 480 481 /* Determine symbolic info for each row of C: */ 482 for (i=0;i<pn/ppdof;i++) { 483 ptnzi = pti[i+1] - pti[i]; 484 ptanzi = 0; 485 ptJ = ptj + pti[i]; 486 for (dof=0;dof<ppdof;dof++) { 487 /* Determine symbolic row of PtA: */ 488 for (j=0;j<ptnzi;j++) { 489 arow = ptJ[j] + dof; 490 anzj = ai[arow+1] - ai[arow]; 491 ajj = aj + ai[arow]; 492 for (k=0;k<anzj;k++) { 493 if (!ptadenserow[ajj[k]]) { 494 ptadenserow[ajj[k]] = -1; 495 ptasparserow[ptanzi++] = ajj[k]; 496 } 497 } 498 } 499 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 500 ptaj = ptasparserow; 501 cnzi = 0; 502 for (j=0;j<ptanzi;j++) { 503 pdof = *ptaj%dof; 504 prow = (*ptaj++)/dof; 505 pnzj = pi[prow+1] - pi[prow]; 506 pjj = pj + pi[prow]; 507 for (k=0;k<pnzj;k++) { 508 if (!denserow[pjj[k]+pdof]) { 509 denserow[pjj[k]+pdof] = -1; 510 sparserow[cnzi++] = pjj[k]+pdof; 511 } 512 } 513 } 514 515 /* sort sparserow */ 516 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 517 518 /* If free space is not available, make more free space */ 519 /* Double the amount of total space in the list */ 520 if (current_space->local_remaining<cnzi) { 521 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 522 } 523 524 /* Copy data into free space, and zero out denserows */ 525 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 526 current_space->array += cnzi; 527 current_space->local_used += cnzi; 528 current_space->local_remaining -= cnzi; 529 530 for (j=0;j<ptanzi;j++) { 531 ptadenserow[ptasparserow[j]] = 0; 532 } 533 for (j=0;j<cnzi;j++) { 534 denserow[sparserow[j]] = 0; 535 } 536 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 537 /* For now, we will recompute what is needed. */ 538 ci[i+1+dof] = ci[i+dof] + cnzi; 539 } 540 } 541 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 542 /* Allocate space for cj, initialize cj, and */ 543 /* destroy list of free space and other temporary array(s) */ 544 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 545 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 546 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 547 548 /* Allocate space for ca */ 549 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 550 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 551 552 /* put together the new matrix */ 553 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 554 555 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 556 /* Since these are PETSc arrays, change flags to free them as necessary. */ 557 c = (Mat_SeqAIJ *)((*C)->data); 558 c->freedata = PETSC_TRUE; 559 c->nonew = 0; 560 561 /* Clean up. */ 562 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 563 564 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 565 PetscFunctionReturn(0); 566 } 567 EXTERN_C_END 568 569 /* 570 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 571 572 Collective on Mat 573 574 Input Parameters: 575 + A - the matrix 576 - P - the projection matrix 577 578 Output Parameters: 579 . C - the product matrix 580 581 Notes: 582 C must have been created by calling MatPtAPSymbolic and must be destroyed by 583 the user using MatDeatroy(). 584 585 This routine is currently only implemented for pairs of AIJ matrices and classes 586 which inherit from AIJ. C will be of type MATAIJ. 587 588 Level: intermediate 589 590 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 591 */ 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatPtAPNumeric" 594 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 595 { 596 PetscErrorCode ierr; 597 PetscErrorCode (*fA)(Mat,Mat,Mat); 598 PetscErrorCode (*fP)(Mat,Mat,Mat); 599 600 PetscFunctionBegin; 601 602 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 603 PetscValidType(A,1); 604 MatPreallocated(A); 605 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 606 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 607 608 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 609 PetscValidType(P,2); 610 MatPreallocated(P); 611 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 612 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 613 614 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 615 PetscValidType(C,3); 616 MatPreallocated(C); 617 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 618 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 619 620 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 621 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 622 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 623 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 624 625 /* For now, we do not dispatch based on the type of A and P */ 626 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 627 fA = A->ops->ptapnumeric; 628 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 629 fP = P->ops->ptapnumeric; 630 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 631 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 632 633 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 634 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 635 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 636 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 642 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 643 { 644 PetscErrorCode ierr; 645 PetscInt flops=0; 646 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 647 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 648 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 649 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 650 PetscInt *ci=c->i,*cj=c->j,*cjj; 651 PetscInt am=A->M,cn=C->N,cm=C->M; 652 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 653 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 654 655 PetscFunctionBegin; 656 /* Allocate temporary array for storage of one row of A*P */ 657 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 658 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 659 660 apj = (PetscInt *)(apa + cn); 661 apjdense = apj + cn; 662 663 /* Clear old values in C */ 664 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 665 666 for (i=0;i<am;i++) { 667 /* Form sparse row of A*P */ 668 anzi = ai[i+1] - ai[i]; 669 apnzj = 0; 670 for (j=0;j<anzi;j++) { 671 prow = *aj++; 672 pnzj = pi[prow+1] - pi[prow]; 673 pjj = pj + pi[prow]; 674 paj = pa + pi[prow]; 675 for (k=0;k<pnzj;k++) { 676 if (!apjdense[pjj[k]]) { 677 apjdense[pjj[k]] = -1; 678 apj[apnzj++] = pjj[k]; 679 } 680 apa[pjj[k]] += (*aa)*paj[k]; 681 } 682 flops += 2*pnzj; 683 aa++; 684 } 685 686 /* Sort the j index array for quick sparse axpy. */ 687 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 688 689 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 690 pnzi = pi[i+1] - pi[i]; 691 for (j=0;j<pnzi;j++) { 692 nextap = 0; 693 crow = *pJ++; 694 cjj = cj + ci[crow]; 695 caj = ca + ci[crow]; 696 /* Perform sparse axpy operation. Note cjj includes apj. */ 697 for (k=0;nextap<apnzj;k++) { 698 if (cjj[k]==apj[nextap]) { 699 caj[k] += (*pA)*apa[apj[nextap++]]; 700 } 701 } 702 flops += 2*apnzj; 703 pA++; 704 } 705 706 /* Zero the current row info for A*P */ 707 for (j=0;j<apnzj;j++) { 708 apa[apj[j]] = 0.; 709 apjdense[apj[j]] = 0; 710 } 711 } 712 713 /* Assemble the final matrix and clean up */ 714 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 716 ierr = PetscFree(apa);CHKERRQ(ierr); 717 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 718 719 PetscFunctionReturn(0); 720 } 721 722 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 723 static PetscEvent logkey_matptapnumeric_local = 0; 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 726 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 727 { 728 PetscErrorCode ierr; 729 PetscInt flops=0; 730 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 731 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 732 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 733 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 734 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 735 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 736 PetscInt *pJ=pj_loc,*pjj; 737 PetscInt *ci=c->i,*cj=c->j,*cjj; 738 PetscInt am=A->m,cn=C->N,cm=C->M; 739 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 740 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 741 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 742 743 PetscFunctionBegin; 744 if (!logkey_matptapnumeric_local) { 745 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 746 } 747 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 748 749 /* Allocate temporary array for storage of one row of A*P */ 750 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 751 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 752 apj = (PetscInt *)(apa + cn); 753 apjdense = apj + cn; 754 755 /* Clear old values in C */ 756 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 757 758 for (i=0;i<am;i++) { 759 /* Form i-th sparse row of A*P */ 760 apnzj = 0; 761 /* diagonal portion of A */ 762 anzi = adi[i+1] - adi[i]; 763 for (j=0;j<anzi;j++) { 764 prow = *adj; 765 adj++; 766 pnzj = pi_loc[prow+1] - pi_loc[prow]; 767 pjj = pj_loc + pi_loc[prow]; 768 paj = pa_loc + pi_loc[prow]; 769 for (k=0;k<pnzj;k++) { 770 if (!apjdense[pjj[k]]) { 771 apjdense[pjj[k]] = -1; 772 apj[apnzj++] = pjj[k]; 773 } 774 apa[pjj[k]] += (*ada)*paj[k]; 775 } 776 flops += 2*pnzj; 777 ada++; 778 } 779 /* off-diagonal portion of A */ 780 anzi = aoi[i+1] - aoi[i]; 781 for (j=0;j<anzi;j++) { 782 col = a->garray[*aoj]; 783 if (col < cstart){ 784 prow = *aoj; 785 } else if (col >= cend){ 786 prow = *aoj; 787 } else { 788 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 789 } 790 aoj++; 791 pnzj = pi_oth[prow+1] - pi_oth[prow]; 792 pjj = pj_oth + pi_oth[prow]; 793 paj = pa_oth + pi_oth[prow]; 794 for (k=0;k<pnzj;k++) { 795 if (!apjdense[pjj[k]]) { 796 apjdense[pjj[k]] = -1; 797 apj[apnzj++] = pjj[k]; 798 } 799 apa[pjj[k]] += (*aoa)*paj[k]; 800 } 801 flops += 2*pnzj; 802 aoa++; 803 } 804 /* Sort the j index array for quick sparse axpy. */ 805 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 806 807 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 808 pnzi = pi_loc[i+1] - pi_loc[i]; 809 for (j=0;j<pnzi;j++) { 810 nextap = 0; 811 crow = *pJ++; 812 cjj = cj + ci[crow]; 813 caj = ca + ci[crow]; 814 /* Perform sparse axpy operation. Note cjj includes apj. */ 815 for (k=0;nextap<apnzj;k++) { 816 if (cjj[k]==apj[nextap]) { 817 caj[k] += (*pA)*apa[apj[nextap++]]; 818 } 819 } 820 flops += 2*apnzj; 821 pA++; 822 } 823 824 /* Zero the current row info for A*P */ 825 for (j=0;j<apnzj;j++) { 826 apa[apj[j]] = 0.; 827 apjdense[apj[j]] = 0; 828 } 829 } 830 831 /* Assemble the final matrix and clean up */ 832 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 ierr = PetscFree(apa);CHKERRQ(ierr); 835 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 836 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 static PetscEvent logkey_matptapsymbolic_local = 0; 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 842 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 843 { 844 PetscErrorCode ierr; 845 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 846 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 847 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 848 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 849 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 850 PetscInt *ci,*cj,*ptaj; 851 PetscInt an=A->N,am=A->m,pN=P_loc->N; 852 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 853 PetscInt pm=P_loc->m,nlnk,*lnk; 854 MatScalar *ca; 855 PetscBT lnkbt; 856 PetscInt prend,nprow_loc,nprow_oth; 857 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 858 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 859 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 860 861 PetscFunctionBegin; 862 if (!logkey_matptapsymbolic_local) { 863 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 864 } 865 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 866 867 prend = prstart + pm; 868 869 /* get ij structure of P_loc^T */ 870 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 871 ptJ=ptj; 872 873 /* Allocate ci array, arrays for fill computation and */ 874 /* free space for accumulating nonzero column info */ 875 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 876 ci[0] = 0; 877 878 ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 879 ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 880 ptasparserow_loc = ptadenserow_loc + an; 881 ptadenserow_oth = ptasparserow_loc + an; 882 ptasparserow_oth = ptadenserow_oth + an; 883 884 /* create and initialize a linked list */ 885 nlnk = pN+1; 886 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 887 888 /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 889 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 890 nnz = adi[am] + aoi[am]; 891 ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 892 current_space = free_space; 893 894 /* determine symbolic info for each row of C: */ 895 for (i=0;i<pN;i++) { 896 ptnzi = pti[i+1] - pti[i]; 897 nprow_loc = 0; nprow_oth = 0; 898 /* i-th row of symbolic P_loc^T*A_loc: */ 899 for (j=0;j<ptnzi;j++) { 900 arow = *ptJ++; 901 /* diagonal portion of A */ 902 anzj = adi[arow+1] - adi[arow]; 903 ajj = adj + adi[arow]; 904 for (k=0;k<anzj;k++) { 905 col = ajj[k]+prstart; 906 if (!ptadenserow_loc[col]) { 907 ptadenserow_loc[col] = -1; 908 ptasparserow_loc[nprow_loc++] = col; 909 } 910 } 911 /* off-diagonal portion of A */ 912 anzj = aoi[arow+1] - aoi[arow]; 913 ajj = aoj + aoi[arow]; 914 for (k=0;k<anzj;k++) { 915 col = a->garray[ajj[k]]; /* global col */ 916 if (col < cstart){ 917 col = ajj[k]; 918 } else if (col >= cend){ 919 col = ajj[k] + pm; 920 } else { 921 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 922 } 923 if (!ptadenserow_oth[col]) { 924 ptadenserow_oth[col] = -1; 925 ptasparserow_oth[nprow_oth++] = col; 926 } 927 } 928 } 929 930 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 931 cnzi = 0; 932 /* rows of P_loc */ 933 ptaj = ptasparserow_loc; 934 for (j=0; j<nprow_loc; j++) { 935 prow = *ptaj++; 936 prow -= prstart; /* rm */ 937 pnzj = pi_loc[prow+1] - pi_loc[prow]; 938 pjj = pj_loc + pi_loc[prow]; 939 /* add non-zero cols of P into the sorted linked list lnk */ 940 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 941 cnzi += nlnk; 942 } 943 /* rows of P_oth */ 944 ptaj = ptasparserow_oth; 945 for (j=0; j<nprow_oth; j++) { 946 prow = *ptaj++; 947 if (prow >= prend) prow -= pm; /* rm */ 948 pnzj = pi_oth[prow+1] - pi_oth[prow]; 949 pjj = pj_oth + pi_oth[prow]; 950 /* add non-zero cols of P into the sorted linked list lnk */ 951 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 952 cnzi += nlnk; 953 } 954 955 /* If free space is not available, make more free space */ 956 /* Double the amount of total space in the list */ 957 if (current_space->local_remaining<cnzi) { 958 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 959 } 960 961 /* Copy data into free space, and zero out denserows */ 962 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 963 current_space->array += cnzi; 964 current_space->local_used += cnzi; 965 current_space->local_remaining -= cnzi; 966 967 for (j=0;j<nprow_loc; j++) { 968 ptadenserow_loc[ptasparserow_loc[j]] = 0; 969 } 970 for (j=0;j<nprow_oth; j++) { 971 ptadenserow_oth[ptasparserow_oth[j]] = 0; 972 } 973 974 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 975 /* For now, we will recompute what is needed. */ 976 ci[i+1] = ci[i] + cnzi; 977 } 978 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 979 /* Allocate space for cj, initialize cj, and */ 980 /* destroy list of free space and other temporary array(s) */ 981 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 982 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 983 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 984 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 985 986 /* Allocate space for ca */ 987 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 988 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 989 990 /* put together the new matrix */ 991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 992 993 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 994 /* Since these are PETSc arrays, change flags to free them as necessary. */ 995 c = (Mat_SeqAIJ *)((*C)->data); 996 c->freedata = PETSC_TRUE; 997 c->nonew = 0; 998 999 /* Clean up. */ 1000 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1001 1002 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005