1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 CHKERRQ(PetscMalloc1(nz + w + 1,&fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 CHKERRQ(PetscMalloc1(nz + w + 1,rfill)); 63 CHKERRQ(PetscArraycpy(*rfill,dfillsparse,nz+w+1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i,k,cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 CHKERRQ(PetscCalloc1(dd->w,&dd->ofillcols)); 76 for (i=0; i<dd->w; i++) { 77 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i=0; i<dd->w; i++) { 80 if (dd->ofillcols[i]) { 81 dd->ofillcols[i] = cnt++; 82 } 83 } 84 PetscFunctionReturn(0); 85 } 86 87 /*@ 88 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 89 of the matrix returned by DMCreateMatrix(). 90 91 Logically Collective on da 92 93 Input Parameters: 94 + da - the distributed array 95 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 96 - ofill - the fill pattern in the off-diagonal blocks 97 98 Level: developer 99 100 Notes: 101 This only makes sense when you are doing multicomponent problems but using the 102 MPIAIJ matrix format 103 104 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 105 representing coupling and 0 entries for missing coupling. For example 106 $ dfill[9] = {1, 0, 0, 107 $ 1, 1, 0, 108 $ 0, 1, 1} 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 119 120 @*/ 121 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 122 { 123 DM_DA *dd = (DM_DA*)da->data; 124 125 PetscFunctionBegin; 126 /* save the given dfill and ofill information */ 127 CHKERRQ(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill)); 128 CHKERRQ(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill)); 129 130 /* count nonzeros in ofill columns */ 131 CHKERRQ(DMDASetBlockFills_Private2(dd)); 132 133 PetscFunctionReturn(0); 134 } 135 136 /*@ 137 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 138 of the matrix returned by DMCreateMatrix(), using sparse representations 139 of fill patterns. 140 141 Logically Collective on da 142 143 Input Parameters: 144 + da - the distributed array 145 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 146 - ofill - the sparse fill pattern in the off-diagonal blocks 147 148 Level: developer 149 150 Notes: This only makes sense when you are doing multicomponent problems but using the 151 MPIAIJ matrix format 152 153 The format for dfill and ofill is a sparse representation of a 154 dof-by-dof matrix with 1 entries representing coupling and 0 entries 155 for missing coupling. The sparse representation is a 1 dimensional 156 array of length nz + dof + 1, where nz is the number of non-zeros in 157 the matrix. The first dof entries in the array give the 158 starting array indices of each row's items in the rest of the array, 159 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 160 and the remaining nz items give the column indices of each of 161 the 1s within the logical 2D matrix. Each row's items within 162 the array are the column indices of the 1s within that row 163 of the 2D matrix. PETSc developers may recognize that this is the 164 same format as that computed by the DMDASetBlockFills_Private() 165 function from a dense 2D matrix representation. 166 167 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 168 can be represented in the dfill, ofill format 169 170 Contributed by Philip C. Roth 171 172 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 173 174 @*/ 175 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 176 { 177 DM_DA *dd = (DM_DA*)da->data; 178 179 PetscFunctionBegin; 180 /* save the given dfill and ofill information */ 181 CHKERRQ(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill)); 182 CHKERRQ(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill)); 183 184 /* count nonzeros in ofill columns */ 185 CHKERRQ(DMDASetBlockFills_Private2(dd)); 186 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 191 { 192 PetscInt dim,m,n,p,nc; 193 DMBoundaryType bx,by,bz; 194 MPI_Comm comm; 195 PetscMPIInt size; 196 PetscBool isBAIJ; 197 DM_DA *dd = (DM_DA*)da->data; 198 199 PetscFunctionBegin; 200 /* 201 m 202 ------------------------------------------------------ 203 | | 204 | | 205 | ---------------------- | 206 | | | | 207 n | yn | | | 208 | | | | 209 | .--------------------- | 210 | (xs,ys) xn | 211 | . | 212 | (gxs,gys) | 213 | | 214 ----------------------------------------------------- 215 */ 216 217 /* 218 nc - number of components per grid point 219 col - number of colors needed in one direction for single component problem 220 221 */ 222 CHKERRQ(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL)); 223 224 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 225 CHKERRMPI(MPI_Comm_size(comm,&size)); 226 if (ctype == IS_COLORING_LOCAL) { 227 if (size == 1) { 228 ctype = IS_COLORING_GLOBAL; 229 } else if (dim > 1) { 230 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 231 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 232 } 233 } 234 } 235 236 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 237 matrices is for the blocks, not the individual matrix elements */ 238 CHKERRQ(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ)); 239 if (!isBAIJ) CHKERRQ(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ)); 240 if (!isBAIJ) CHKERRQ(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ)); 241 if (isBAIJ) { 242 dd->w = 1; 243 dd->xs = dd->xs/nc; 244 dd->xe = dd->xe/nc; 245 dd->Xs = dd->Xs/nc; 246 dd->Xe = dd->Xe/nc; 247 } 248 249 /* 250 We do not provide a getcoloring function in the DMDA operations because 251 the basic DMDA does not know about matrices. We think of DMDA as being 252 more low-level then matrices. 253 */ 254 if (dim == 1) { 255 CHKERRQ(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring)); 256 } else if (dim == 2) { 257 CHKERRQ(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring)); 258 } else if (dim == 3) { 259 CHKERRQ(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring)); 260 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 261 if (isBAIJ) { 262 dd->w = nc; 263 dd->xs = dd->xs*nc; 264 dd->xe = dd->xe*nc; 265 dd->Xs = dd->Xs*nc; 266 dd->Xe = dd->Xe*nc; 267 } 268 PetscFunctionReturn(0); 269 } 270 271 /* ---------------------------------------------------------------------------------*/ 272 273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 274 { 275 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 276 PetscInt ncolors; 277 MPI_Comm comm; 278 DMBoundaryType bx,by; 279 DMDAStencilType st; 280 ISColoringValue *colors; 281 DM_DA *dd = (DM_DA*)da->data; 282 283 PetscFunctionBegin; 284 /* 285 nc - number of components per grid point 286 col - number of colors needed in one direction for single component problem 287 288 */ 289 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 290 col = 2*s + 1; 291 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 292 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 293 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 294 295 /* special case as taught to us by Paul Hovland */ 296 if (st == DMDA_STENCIL_STAR && s == 1) { 297 CHKERRQ(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring)); 298 } else { 299 if (ctype == IS_COLORING_GLOBAL) { 300 if (!dd->localcoloring) { 301 CHKERRQ(PetscMalloc1(nc*nx*ny,&colors)); 302 ii = 0; 303 for (j=ys; j<ys+ny; j++) { 304 for (i=xs; i<xs+nx; i++) { 305 for (k=0; k<nc; k++) { 306 colors[ii++] = k + nc*((i % col) + col*(j % col)); 307 } 308 } 309 } 310 ncolors = nc + nc*(col-1 + col*(col-1)); 311 CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 312 } 313 *coloring = dd->localcoloring; 314 } else if (ctype == IS_COLORING_LOCAL) { 315 if (!dd->ghostedcoloring) { 316 CHKERRQ(PetscMalloc1(nc*gnx*gny,&colors)); 317 ii = 0; 318 for (j=gys; j<gys+gny; j++) { 319 for (i=gxs; i<gxs+gnx; i++) { 320 for (k=0; k<nc; k++) { 321 /* the complicated stuff is to handle periodic boundaries */ 322 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 323 } 324 } 325 } 326 ncolors = nc + nc*(col - 1 + col*(col-1)); 327 CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 328 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 329 330 CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 331 } 332 *coloring = dd->ghostedcoloring; 333 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 334 } 335 CHKERRQ(ISColoringReference(*coloring)); 336 PetscFunctionReturn(0); 337 } 338 339 /* ---------------------------------------------------------------------------------*/ 340 341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 342 { 343 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 344 PetscInt ncolors; 345 MPI_Comm comm; 346 DMBoundaryType bx,by,bz; 347 DMDAStencilType st; 348 ISColoringValue *colors; 349 DM_DA *dd = (DM_DA*)da->data; 350 351 PetscFunctionBegin; 352 /* 353 nc - number of components per grid point 354 col - number of colors needed in one direction for single component problem 355 356 */ 357 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 358 col = 2*s + 1; 359 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 360 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 361 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 362 363 /* create the coloring */ 364 if (ctype == IS_COLORING_GLOBAL) { 365 if (!dd->localcoloring) { 366 CHKERRQ(PetscMalloc1(nc*nx*ny*nz,&colors)); 367 ii = 0; 368 for (k=zs; k<zs+nz; k++) { 369 for (j=ys; j<ys+ny; j++) { 370 for (i=xs; i<xs+nx; i++) { 371 for (l=0; l<nc; l++) { 372 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 373 } 374 } 375 } 376 } 377 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 378 CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 379 } 380 *coloring = dd->localcoloring; 381 } else if (ctype == IS_COLORING_LOCAL) { 382 if (!dd->ghostedcoloring) { 383 CHKERRQ(PetscMalloc1(nc*gnx*gny*gnz,&colors)); 384 ii = 0; 385 for (k=gzs; k<gzs+gnz; k++) { 386 for (j=gys; j<gys+gny; j++) { 387 for (i=gxs; i<gxs+gnx; i++) { 388 for (l=0; l<nc; l++) { 389 /* the complicated stuff is to handle periodic boundaries */ 390 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 391 } 392 } 393 } 394 } 395 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 396 CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 397 CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 398 } 399 *coloring = dd->ghostedcoloring; 400 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 401 CHKERRQ(ISColoringReference(*coloring)); 402 PetscFunctionReturn(0); 403 } 404 405 /* ---------------------------------------------------------------------------------*/ 406 407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 408 { 409 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 410 PetscInt ncolors; 411 MPI_Comm comm; 412 DMBoundaryType bx; 413 ISColoringValue *colors; 414 DM_DA *dd = (DM_DA*)da->data; 415 416 PetscFunctionBegin; 417 /* 418 nc - number of components per grid point 419 col - number of colors needed in one direction for single component problem 420 421 */ 422 CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 423 col = 2*s + 1; 424 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 425 CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 426 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 427 428 /* create the coloring */ 429 if (ctype == IS_COLORING_GLOBAL) { 430 if (!dd->localcoloring) { 431 CHKERRQ(PetscMalloc1(nc*nx,&colors)); 432 if (dd->ofillcols) { 433 PetscInt tc = 0; 434 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 435 i1 = 0; 436 for (i=xs; i<xs+nx; i++) { 437 for (l=0; l<nc; l++) { 438 if (dd->ofillcols[l] && (i % col)) { 439 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 440 } else { 441 colors[i1++] = l; 442 } 443 } 444 } 445 ncolors = nc + 2*s*tc; 446 } else { 447 i1 = 0; 448 for (i=xs; i<xs+nx; i++) { 449 for (l=0; l<nc; l++) { 450 colors[i1++] = l + nc*(i % col); 451 } 452 } 453 ncolors = nc + nc*(col-1); 454 } 455 CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 456 } 457 *coloring = dd->localcoloring; 458 } else if (ctype == IS_COLORING_LOCAL) { 459 if (!dd->ghostedcoloring) { 460 CHKERRQ(PetscMalloc1(nc*gnx,&colors)); 461 i1 = 0; 462 for (i=gxs; i<gxs+gnx; i++) { 463 for (l=0; l<nc; l++) { 464 /* the complicated stuff is to handle periodic boundaries */ 465 colors[i1++] = l + nc*(SetInRange(i,m) % col); 466 } 467 } 468 ncolors = nc + nc*(col-1); 469 CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 470 CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 471 } 472 *coloring = dd->ghostedcoloring; 473 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 474 CHKERRQ(ISColoringReference(*coloring)); 475 PetscFunctionReturn(0); 476 } 477 478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 479 { 480 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 481 PetscInt ncolors; 482 MPI_Comm comm; 483 DMBoundaryType bx,by; 484 ISColoringValue *colors; 485 DM_DA *dd = (DM_DA*)da->data; 486 487 PetscFunctionBegin; 488 /* 489 nc - number of components per grid point 490 col - number of colors needed in one direction for single component problem 491 492 */ 493 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL)); 494 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 495 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 496 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 497 /* create the coloring */ 498 if (ctype == IS_COLORING_GLOBAL) { 499 if (!dd->localcoloring) { 500 CHKERRQ(PetscMalloc1(nc*nx*ny,&colors)); 501 ii = 0; 502 for (j=ys; j<ys+ny; j++) { 503 for (i=xs; i<xs+nx; i++) { 504 for (k=0; k<nc; k++) { 505 colors[ii++] = k + nc*((3*j+i) % 5); 506 } 507 } 508 } 509 ncolors = 5*nc; 510 CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 511 } 512 *coloring = dd->localcoloring; 513 } else if (ctype == IS_COLORING_LOCAL) { 514 if (!dd->ghostedcoloring) { 515 CHKERRQ(PetscMalloc1(nc*gnx*gny,&colors)); 516 ii = 0; 517 for (j=gys; j<gys+gny; j++) { 518 for (i=gxs; i<gxs+gnx; i++) { 519 for (k=0; k<nc; k++) { 520 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 521 } 522 } 523 } 524 ncolors = 5*nc; 525 CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 526 CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 527 } 528 *coloring = dd->ghostedcoloring; 529 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 530 PetscFunctionReturn(0); 531 } 532 533 /* =========================================================================== */ 534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 548 549 /*@C 550 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 551 552 Logically Collective on mat 553 554 Input Parameters: 555 + mat - the matrix 556 - da - the da 557 558 Level: intermediate 559 560 @*/ 561 PetscErrorCode MatSetupDM(Mat mat,DM da) 562 { 563 PetscFunctionBegin; 564 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 565 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 566 CHKERRQ(PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da))); 567 PetscFunctionReturn(0); 568 } 569 570 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 571 { 572 DM da; 573 const char *prefix; 574 Mat Anatural; 575 AO ao; 576 PetscInt rstart,rend,*petsc,i; 577 IS is; 578 MPI_Comm comm; 579 PetscViewerFormat format; 580 581 PetscFunctionBegin; 582 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 583 CHKERRQ(PetscViewerGetFormat(viewer,&format)); 584 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 585 586 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 587 CHKERRQ(MatGetDM(A, &da)); 588 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 589 590 CHKERRQ(DMDAGetAO(da,&ao)); 591 CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend)); 592 CHKERRQ(PetscMalloc1(rend-rstart,&petsc)); 593 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 594 CHKERRQ(AOApplicationToPetsc(ao,rend-rstart,petsc)); 595 CHKERRQ(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is)); 596 597 /* call viewer on natural ordering */ 598 CHKERRQ(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural)); 599 CHKERRQ(ISDestroy(&is)); 600 CHKERRQ(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix)); 601 CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix)); 602 CHKERRQ(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name)); 603 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 604 CHKERRQ(MatView(Anatural,viewer)); 605 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 606 CHKERRQ(MatDestroy(&Anatural)); 607 PetscFunctionReturn(0); 608 } 609 610 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 611 { 612 DM da; 613 Mat Anatural,Aapp; 614 AO ao; 615 PetscInt rstart,rend,*app,i,m,n,M,N; 616 IS is; 617 MPI_Comm comm; 618 619 PetscFunctionBegin; 620 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 621 CHKERRQ(MatGetDM(A, &da)); 622 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 623 624 /* Load the matrix in natural ordering */ 625 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&Anatural)); 626 CHKERRQ(MatSetType(Anatural,((PetscObject)A)->type_name)); 627 CHKERRQ(MatGetSize(A,&M,&N)); 628 CHKERRQ(MatGetLocalSize(A,&m,&n)); 629 CHKERRQ(MatSetSizes(Anatural,m,n,M,N)); 630 CHKERRQ(MatLoad(Anatural,viewer)); 631 632 /* Map natural ordering to application ordering and create IS */ 633 CHKERRQ(DMDAGetAO(da,&ao)); 634 CHKERRQ(MatGetOwnershipRange(Anatural,&rstart,&rend)); 635 CHKERRQ(PetscMalloc1(rend-rstart,&app)); 636 for (i=rstart; i<rend; i++) app[i-rstart] = i; 637 CHKERRQ(AOPetscToApplication(ao,rend-rstart,app)); 638 CHKERRQ(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is)); 639 640 /* Do permutation and replace header */ 641 CHKERRQ(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp)); 642 CHKERRQ(MatHeaderReplace(A,&Aapp)); 643 CHKERRQ(ISDestroy(&is)); 644 CHKERRQ(MatDestroy(&Anatural)); 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 649 { 650 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 651 Mat A; 652 MPI_Comm comm; 653 MatType Atype; 654 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 655 MatType mtype; 656 PetscMPIInt size; 657 DM_DA *dd = (DM_DA*)da->data; 658 659 PetscFunctionBegin; 660 CHKERRQ(MatInitializePackage()); 661 mtype = da->mattype; 662 663 /* 664 m 665 ------------------------------------------------------ 666 | | 667 | | 668 | ---------------------- | 669 | | | | 670 n | ny | | | 671 | | | | 672 | .--------------------- | 673 | (xs,ys) nx | 674 | . | 675 | (gxs,gys) | 676 | | 677 ----------------------------------------------------- 678 */ 679 680 /* 681 nc - number of components per grid point 682 col - number of colors needed in one direction for single component problem 683 684 */ 685 M = dd->M; 686 N = dd->N; 687 P = dd->P; 688 dim = da->dim; 689 dof = dd->w; 690 /* CHKERRQ(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 691 CHKERRQ(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz)); 692 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 693 CHKERRQ(MatCreate(comm,&A)); 694 CHKERRQ(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P)); 695 CHKERRQ(MatSetType(A,mtype)); 696 CHKERRQ(MatSetFromOptions(A)); 697 if (dof*nx*ny*nz < da->bind_below) { 698 CHKERRQ(MatSetBindingPropagates(A,PETSC_TRUE)); 699 CHKERRQ(MatBindToCPU(A,PETSC_TRUE)); 700 } 701 CHKERRQ(MatSetDM(A,da)); 702 if (da->structure_only) { 703 CHKERRQ(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE)); 704 } 705 CHKERRQ(MatGetType(A,&Atype)); 706 /* 707 We do not provide a getmatrix function in the DMDA operations because 708 the basic DMDA does not know about matrices. We think of DMDA as being more 709 more low-level than matrices. This is kind of cheating but, cause sometimes 710 we think of DMDA has higher level than matrices. 711 712 We could switch based on Atype (or mtype), but we do not since the 713 specialized setting routines depend only on the particular preallocation 714 details of the matrix, not the type itself. 715 */ 716 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij)); 717 if (!aij) { 718 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij)); 719 } 720 if (!aij) { 721 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij)); 722 if (!baij) { 723 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij)); 724 } 725 if (!baij) { 726 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij)); 727 if (!sbaij) { 728 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij)); 729 } 730 if (!sbaij) { 731 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell)); 732 if (!sell) { 733 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell)); 734 } 735 } 736 if (!sell) { 737 CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is)); 738 } 739 } 740 } 741 if (aij) { 742 if (dim == 1) { 743 if (dd->ofill) { 744 CHKERRQ(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A)); 745 } else { 746 DMBoundaryType bx; 747 PetscMPIInt size; 748 CHKERRQ(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL)); 749 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 750 if (size == 1 && bx == DM_BOUNDARY_NONE) { 751 CHKERRQ(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A)); 752 } else { 753 CHKERRQ(DMCreateMatrix_DA_1d_MPIAIJ(da,A)); 754 } 755 } 756 } else if (dim == 2) { 757 if (dd->ofill) { 758 CHKERRQ(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A)); 759 } else { 760 CHKERRQ(DMCreateMatrix_DA_2d_MPIAIJ(da,A)); 761 } 762 } else if (dim == 3) { 763 if (dd->ofill) { 764 CHKERRQ(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A)); 765 } else { 766 CHKERRQ(DMCreateMatrix_DA_3d_MPIAIJ(da,A)); 767 } 768 } 769 } else if (baij) { 770 if (dim == 2) { 771 CHKERRQ(DMCreateMatrix_DA_2d_MPIBAIJ(da,A)); 772 } else if (dim == 3) { 773 CHKERRQ(DMCreateMatrix_DA_3d_MPIBAIJ(da,A)); 774 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 775 } else if (sbaij) { 776 if (dim == 2) { 777 CHKERRQ(DMCreateMatrix_DA_2d_MPISBAIJ(da,A)); 778 } else if (dim == 3) { 779 CHKERRQ(DMCreateMatrix_DA_3d_MPISBAIJ(da,A)); 780 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 781 } else if (sell) { 782 if (dim == 2) { 783 CHKERRQ(DMCreateMatrix_DA_2d_MPISELL(da,A)); 784 } else if (dim == 3) { 785 CHKERRQ(DMCreateMatrix_DA_3d_MPISELL(da,A)); 786 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 787 } else if (is) { 788 CHKERRQ(DMCreateMatrix_DA_IS(da,A)); 789 } else { 790 ISLocalToGlobalMapping ltog; 791 792 CHKERRQ(MatSetBlockSize(A,dof)); 793 CHKERRQ(MatSetUp(A)); 794 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 795 CHKERRQ(MatSetLocalToGlobalMapping(A,ltog,ltog)); 796 } 797 CHKERRQ(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 798 CHKERRQ(MatSetStencil(A,dim,dims,starts,dof)); 799 CHKERRQ(MatSetDM(A,da)); 800 CHKERRMPI(MPI_Comm_size(comm,&size)); 801 if (size > 1) { 802 /* change viewer to display matrix in natural ordering */ 803 CHKERRQ(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 804 CHKERRQ(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 805 } 806 *J = A; 807 PetscFunctionReturn(0); 808 } 809 810 /* ---------------------------------------------------------------------------------*/ 811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 812 813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 814 { 815 DM_DA *da = (DM_DA*)dm->data; 816 Mat lJ,P; 817 ISLocalToGlobalMapping ltog; 818 IS is; 819 PetscBT bt; 820 const PetscInt *e_loc,*idx; 821 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 822 823 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 824 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 825 PetscFunctionBegin; 826 dof = da->w; 827 CHKERRQ(MatSetBlockSize(J,dof)); 828 CHKERRQ(DMGetLocalToGlobalMapping(dm,<og)); 829 830 /* flag local elements indices in local DMDA numbering */ 831 CHKERRQ(ISLocalToGlobalMappingGetSize(ltog,&nv)); 832 CHKERRQ(PetscBTCreate(nv/dof,&bt)); 833 CHKERRQ(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 834 for (i=0;i<nel*nen;i++) CHKERRQ(PetscBTSet(bt,e_loc[i])); 835 836 /* filter out (set to -1) the global indices not used by the local elements */ 837 CHKERRQ(PetscMalloc1(nv/dof,&gidx)); 838 CHKERRQ(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx)); 839 CHKERRQ(PetscArraycpy(gidx,idx,nv/dof)); 840 CHKERRQ(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx)); 841 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 842 CHKERRQ(PetscBTDestroy(&bt)); 843 CHKERRQ(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is)); 844 CHKERRQ(ISLocalToGlobalMappingCreateIS(is,<og)); 845 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 846 CHKERRQ(ISLocalToGlobalMappingDestroy(<og)); 847 CHKERRQ(ISDestroy(&is)); 848 849 /* Preallocation */ 850 if (dm->prealloc_skip) { 851 CHKERRQ(MatSetUp(J)); 852 } else { 853 CHKERRQ(MatISGetLocalMat(J,&lJ)); 854 CHKERRQ(MatGetLocalToGlobalMapping(lJ,<og,NULL)); 855 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)lJ),&P)); 856 CHKERRQ(MatSetType(P,MATPREALLOCATOR)); 857 CHKERRQ(MatSetLocalToGlobalMapping(P,ltog,ltog)); 858 CHKERRQ(MatGetSize(lJ,&N,NULL)); 859 CHKERRQ(MatGetLocalSize(lJ,&n,NULL)); 860 CHKERRQ(MatSetSizes(P,n,n,N,N)); 861 CHKERRQ(MatSetBlockSize(P,dof)); 862 CHKERRQ(MatSetUp(P)); 863 for (i=0;i<nel;i++) { 864 CHKERRQ(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES)); 865 } 866 CHKERRQ(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ)); 867 CHKERRQ(MatISRestoreLocalMat(J,&lJ)); 868 CHKERRQ(DMDARestoreElements(dm,&nel,&nen,&e_loc)); 869 CHKERRQ(MatDestroy(&P)); 870 871 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 872 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 873 } 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 878 { 879 PetscErrorCode ierr; 880 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 881 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 882 MPI_Comm comm; 883 PetscScalar *values; 884 DMBoundaryType bx,by; 885 ISLocalToGlobalMapping ltog; 886 DMDAStencilType st; 887 888 PetscFunctionBegin; 889 /* 890 nc - number of components per grid point 891 col - number of colors needed in one direction for single component problem 892 893 */ 894 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 895 col = 2*s + 1; 896 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 897 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 898 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 899 900 CHKERRQ(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 901 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 902 903 CHKERRQ(MatSetBlockSize(J,nc)); 904 /* determine the matrix preallocation information */ 905 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 906 for (i=xs; i<xs+nx; i++) { 907 908 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 909 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 910 911 for (j=ys; j<ys+ny; j++) { 912 slot = i - gxs + gnx*(j - gys); 913 914 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 915 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 916 917 cnt = 0; 918 for (k=0; k<nc; k++) { 919 for (l=lstart; l<lend+1; l++) { 920 for (p=pstart; p<pend+1; p++) { 921 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 922 cols[cnt++] = k + nc*(slot + gnx*l + p); 923 } 924 } 925 } 926 rows[k] = k + nc*(slot); 927 } 928 CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 929 } 930 } 931 CHKERRQ(MatSetBlockSize(J,nc)); 932 CHKERRQ(MatSeqSELLSetPreallocation(J,0,dnz)); 933 CHKERRQ(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 934 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 935 936 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 937 938 /* 939 For each node in the grid: we get the neighbors in the local (on processor ordering 940 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 941 PETSc ordering. 942 */ 943 if (!da->prealloc_only) { 944 CHKERRQ(PetscCalloc1(col*col*nc*nc,&values)); 945 for (i=xs; i<xs+nx; i++) { 946 947 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 948 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 949 950 for (j=ys; j<ys+ny; j++) { 951 slot = i - gxs + gnx*(j - gys); 952 953 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 954 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 955 956 cnt = 0; 957 for (k=0; k<nc; k++) { 958 for (l=lstart; l<lend+1; l++) { 959 for (p=pstart; p<pend+1; p++) { 960 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 961 cols[cnt++] = k + nc*(slot + gnx*l + p); 962 } 963 } 964 } 965 rows[k] = k + nc*(slot); 966 } 967 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 968 } 969 } 970 CHKERRQ(PetscFree(values)); 971 /* do not copy values to GPU since they are all zero and not yet needed there */ 972 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 973 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 974 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 975 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 976 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 977 } 978 CHKERRQ(PetscFree2(rows,cols)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 983 { 984 PetscErrorCode ierr; 985 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 986 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 987 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 988 MPI_Comm comm; 989 PetscScalar *values; 990 DMBoundaryType bx,by,bz; 991 ISLocalToGlobalMapping ltog; 992 DMDAStencilType st; 993 994 PetscFunctionBegin; 995 /* 996 nc - number of components per grid point 997 col - number of colors needed in one direction for single component problem 998 999 */ 1000 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1001 col = 2*s + 1; 1002 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1003 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1004 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1005 1006 CHKERRQ(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1007 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1008 1009 CHKERRQ(MatSetBlockSize(J,nc)); 1010 /* determine the matrix preallocation information */ 1011 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1012 for (i=xs; i<xs+nx; i++) { 1013 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1014 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1015 for (j=ys; j<ys+ny; j++) { 1016 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1017 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1018 for (k=zs; k<zs+nz; k++) { 1019 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1020 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1021 1022 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1023 1024 cnt = 0; 1025 for (l=0; l<nc; l++) { 1026 for (ii=istart; ii<iend+1; ii++) { 1027 for (jj=jstart; jj<jend+1; jj++) { 1028 for (kk=kstart; kk<kend+1; kk++) { 1029 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1030 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1031 } 1032 } 1033 } 1034 } 1035 rows[l] = l + nc*(slot); 1036 } 1037 CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1038 } 1039 } 1040 } 1041 CHKERRQ(MatSetBlockSize(J,nc)); 1042 CHKERRQ(MatSeqSELLSetPreallocation(J,0,dnz)); 1043 CHKERRQ(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 1044 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1045 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1046 1047 /* 1048 For each node in the grid: we get the neighbors in the local (on processor ordering 1049 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1050 PETSc ordering. 1051 */ 1052 if (!da->prealloc_only) { 1053 CHKERRQ(PetscCalloc1(col*col*col*nc*nc*nc,&values)); 1054 for (i=xs; i<xs+nx; i++) { 1055 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1056 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1057 for (j=ys; j<ys+ny; j++) { 1058 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1059 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1060 for (k=zs; k<zs+nz; k++) { 1061 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1062 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1063 1064 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1065 1066 cnt = 0; 1067 for (l=0; l<nc; l++) { 1068 for (ii=istart; ii<iend+1; ii++) { 1069 for (jj=jstart; jj<jend+1; jj++) { 1070 for (kk=kstart; kk<kend+1; kk++) { 1071 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1072 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1073 } 1074 } 1075 } 1076 } 1077 rows[l] = l + nc*(slot); 1078 } 1079 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 1080 } 1081 } 1082 } 1083 CHKERRQ(PetscFree(values)); 1084 /* do not copy values to GPU since they are all zero and not yet needed there */ 1085 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1086 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1087 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1088 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1089 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1090 } 1091 CHKERRQ(PetscFree2(rows,cols)); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1096 { 1097 PetscErrorCode ierr; 1098 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1099 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1100 MPI_Comm comm; 1101 DMBoundaryType bx,by; 1102 ISLocalToGlobalMapping ltog,mltog; 1103 DMDAStencilType st; 1104 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1105 1106 PetscFunctionBegin; 1107 /* 1108 nc - number of components per grid point 1109 col - number of colors needed in one direction for single component problem 1110 1111 */ 1112 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1113 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1114 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1115 } 1116 col = 2*s + 1; 1117 /* 1118 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1119 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1120 */ 1121 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1122 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1123 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1124 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1125 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1126 1127 CHKERRQ(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 1128 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1129 1130 CHKERRQ(MatSetBlockSize(J,nc)); 1131 /* determine the matrix preallocation information */ 1132 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1133 for (i=xs; i<xs+nx; i++) { 1134 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1135 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1136 1137 for (j=ys; j<ys+ny; j++) { 1138 slot = i - gxs + gnx*(j - gys); 1139 1140 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1141 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1142 1143 cnt = 0; 1144 for (k=0; k<nc; k++) { 1145 for (l=lstart; l<lend+1; l++) { 1146 for (p=pstart; p<pend+1; p++) { 1147 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1148 cols[cnt++] = k + nc*(slot + gnx*l + p); 1149 } 1150 } 1151 } 1152 rows[k] = k + nc*(slot); 1153 } 1154 if (removedups) { 1155 CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1156 } else { 1157 CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1158 } 1159 } 1160 } 1161 CHKERRQ(MatSetBlockSize(J,nc)); 1162 CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz)); 1163 CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1164 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1165 CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1166 if (!mltog) { 1167 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1168 } 1169 1170 /* 1171 For each node in the grid: we get the neighbors in the local (on processor ordering 1172 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1173 PETSc ordering. 1174 */ 1175 if (!da->prealloc_only) { 1176 for (i=xs; i<xs+nx; i++) { 1177 1178 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1179 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1180 1181 for (j=ys; j<ys+ny; j++) { 1182 slot = i - gxs + gnx*(j - gys); 1183 1184 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1185 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1186 1187 cnt = 0; 1188 for (l=lstart; l<lend+1; l++) { 1189 for (p=pstart; p<pend+1; p++) { 1190 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1191 cols[cnt++] = nc*(slot + gnx*l + p); 1192 for (k=1; k<nc; k++) { 1193 cols[cnt] = 1 + cols[cnt-1];cnt++; 1194 } 1195 } 1196 } 1197 } 1198 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1199 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1200 } 1201 } 1202 /* do not copy values to GPU since they are all zero and not yet needed there */ 1203 CHKERRQ(MatBoundToCPU(J,&alreadyboundtocpu)); 1204 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1205 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1206 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1207 if (!alreadyboundtocpu) CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1208 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1209 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1210 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1211 } 1212 } 1213 CHKERRQ(PetscFree2(rows,cols)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1218 { 1219 PetscErrorCode ierr; 1220 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1221 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1222 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1223 DM_DA *dd = (DM_DA*)da->data; 1224 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1225 MPI_Comm comm; 1226 DMBoundaryType bx,by; 1227 ISLocalToGlobalMapping ltog; 1228 DMDAStencilType st; 1229 PetscBool removedups = PETSC_FALSE; 1230 1231 PetscFunctionBegin; 1232 /* 1233 nc - number of components per grid point 1234 col - number of colors needed in one direction for single component problem 1235 1236 */ 1237 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1238 col = 2*s + 1; 1239 /* 1240 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1241 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1242 */ 1243 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1244 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1245 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1246 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1247 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1248 1249 CHKERRQ(PetscMalloc1(col*col*nc,&cols)); 1250 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1251 1252 CHKERRQ(MatSetBlockSize(J,nc)); 1253 /* determine the matrix preallocation information */ 1254 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1255 for (i=xs; i<xs+nx; i++) { 1256 1257 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1258 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1259 1260 for (j=ys; j<ys+ny; j++) { 1261 slot = i - gxs + gnx*(j - gys); 1262 1263 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1264 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1265 1266 for (k=0; k<nc; k++) { 1267 cnt = 0; 1268 for (l=lstart; l<lend+1; l++) { 1269 for (p=pstart; p<pend+1; p++) { 1270 if (l || p) { 1271 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1272 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1273 } 1274 } else { 1275 if (dfill) { 1276 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1277 } else { 1278 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1279 } 1280 } 1281 } 1282 } 1283 row = k + nc*(slot); 1284 maxcnt = PetscMax(maxcnt,cnt); 1285 if (removedups) { 1286 CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1287 } else { 1288 CHKERRQ(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1289 } 1290 } 1291 } 1292 } 1293 CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz)); 1294 CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1295 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1296 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1297 1298 /* 1299 For each node in the grid: we get the neighbors in the local (on processor ordering 1300 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1301 PETSc ordering. 1302 */ 1303 if (!da->prealloc_only) { 1304 for (i=xs; i<xs+nx; i++) { 1305 1306 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1307 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1308 1309 for (j=ys; j<ys+ny; j++) { 1310 slot = i - gxs + gnx*(j - gys); 1311 1312 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1313 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1314 1315 for (k=0; k<nc; k++) { 1316 cnt = 0; 1317 for (l=lstart; l<lend+1; l++) { 1318 for (p=pstart; p<pend+1; p++) { 1319 if (l || p) { 1320 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1321 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1322 } 1323 } else { 1324 if (dfill) { 1325 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1326 } else { 1327 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1328 } 1329 } 1330 } 1331 } 1332 row = k + nc*(slot); 1333 CHKERRQ(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1334 } 1335 } 1336 } 1337 /* do not copy values to GPU since they are all zero and not yet needed there */ 1338 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1339 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1340 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1341 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1342 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1343 } 1344 CHKERRQ(PetscFree(cols)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /* ---------------------------------------------------------------------------------*/ 1349 1350 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1351 { 1352 PetscErrorCode ierr; 1353 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1354 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1355 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1356 MPI_Comm comm; 1357 DMBoundaryType bx,by,bz; 1358 ISLocalToGlobalMapping ltog,mltog; 1359 DMDAStencilType st; 1360 PetscBool removedups = PETSC_FALSE; 1361 1362 PetscFunctionBegin; 1363 /* 1364 nc - number of components per grid point 1365 col - number of colors needed in one direction for single component problem 1366 1367 */ 1368 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1369 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1370 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1371 } 1372 col = 2*s + 1; 1373 1374 /* 1375 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1376 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1377 */ 1378 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1379 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1380 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1381 1382 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1383 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1384 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1385 1386 CHKERRQ(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1387 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1388 1389 CHKERRQ(MatSetBlockSize(J,nc)); 1390 /* determine the matrix preallocation information */ 1391 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1392 for (i=xs; i<xs+nx; i++) { 1393 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1394 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1395 for (j=ys; j<ys+ny; j++) { 1396 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1397 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1398 for (k=zs; k<zs+nz; k++) { 1399 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1400 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1401 1402 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1403 1404 cnt = 0; 1405 for (l=0; l<nc; l++) { 1406 for (ii=istart; ii<iend+1; ii++) { 1407 for (jj=jstart; jj<jend+1; jj++) { 1408 for (kk=kstart; kk<kend+1; kk++) { 1409 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1410 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1411 } 1412 } 1413 } 1414 } 1415 rows[l] = l + nc*(slot); 1416 } 1417 if (removedups) { 1418 CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1419 } else { 1420 CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1421 } 1422 } 1423 } 1424 } 1425 CHKERRQ(MatSetBlockSize(J,nc)); 1426 CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz)); 1427 CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1428 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1429 CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1430 if (!mltog) { 1431 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1432 } 1433 1434 /* 1435 For each node in the grid: we get the neighbors in the local (on processor ordering 1436 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1437 PETSc ordering. 1438 */ 1439 if (!da->prealloc_only) { 1440 for (i=xs; i<xs+nx; i++) { 1441 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1442 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1443 for (j=ys; j<ys+ny; j++) { 1444 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1445 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1446 for (k=zs; k<zs+nz; k++) { 1447 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1448 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1449 1450 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1451 1452 cnt = 0; 1453 for (kk=kstart; kk<kend+1; kk++) { 1454 for (jj=jstart; jj<jend+1; jj++) { 1455 for (ii=istart; ii<iend+1; ii++) { 1456 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1457 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1458 for (l=1; l<nc; l++) { 1459 cols[cnt] = 1 + cols[cnt-1];cnt++; 1460 } 1461 } 1462 } 1463 } 1464 } 1465 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1466 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1467 } 1468 } 1469 } 1470 /* do not copy values to GPU since they are all zero and not yet needed there */ 1471 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1472 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1473 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1474 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1475 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1476 } 1477 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1478 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1479 } 1480 CHKERRQ(PetscFree2(rows,cols)); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 /* ---------------------------------------------------------------------------------*/ 1485 1486 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1487 { 1488 DM_DA *dd = (DM_DA*)da->data; 1489 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1490 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1491 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1492 DMBoundaryType bx; 1493 ISLocalToGlobalMapping ltog; 1494 PetscMPIInt rank,size; 1495 1496 PetscFunctionBegin; 1497 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank)); 1498 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 1499 1500 /* 1501 nc - number of components per grid point 1502 1503 */ 1504 CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1505 PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1506 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1507 CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1508 1509 CHKERRQ(MatSetBlockSize(J,nc)); 1510 CHKERRQ(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols)); 1511 1512 /* 1513 note should be smaller for first and last process with no periodic 1514 does not handle dfill 1515 */ 1516 cnt = 0; 1517 /* coupling with process to the left */ 1518 for (i=0; i<s; i++) { 1519 for (j=0; j<nc; j++) { 1520 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1521 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1522 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1523 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1524 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1525 } 1526 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1527 cnt++; 1528 } 1529 } 1530 for (i=s; i<nx-s; i++) { 1531 for (j=0; j<nc; j++) { 1532 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1533 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1534 cnt++; 1535 } 1536 } 1537 /* coupling with process to the right */ 1538 for (i=nx-s; i<nx; i++) { 1539 for (j=0; j<nc; j++) { 1540 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1541 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1542 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1543 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1544 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1545 } 1546 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1547 cnt++; 1548 } 1549 } 1550 1551 CHKERRQ(MatSeqAIJSetPreallocation(J,0,cols)); 1552 CHKERRQ(MatMPIAIJSetPreallocation(J,0,cols,0,ocols)); 1553 CHKERRQ(PetscFree2(cols,ocols)); 1554 1555 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1556 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1557 1558 /* 1559 For each node in the grid: we get the neighbors in the local (on processor ordering 1560 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1561 PETSc ordering. 1562 */ 1563 if (!da->prealloc_only) { 1564 CHKERRQ(PetscMalloc1(maxcnt,&cols)); 1565 row = xs*nc; 1566 /* coupling with process to the left */ 1567 for (i=xs; i<xs+s; i++) { 1568 for (j=0; j<nc; j++) { 1569 cnt = 0; 1570 if (rank) { 1571 for (l=0; l<s; l++) { 1572 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1573 } 1574 } 1575 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1576 for (l=0; l<s; l++) { 1577 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1578 } 1579 } 1580 if (dfill) { 1581 for (k=dfill[j]; k<dfill[j+1]; k++) { 1582 cols[cnt++] = i*nc + dfill[k]; 1583 } 1584 } else { 1585 for (k=0; k<nc; k++) { 1586 cols[cnt++] = i*nc + k; 1587 } 1588 } 1589 for (l=0; l<s; l++) { 1590 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1591 } 1592 CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1593 row++; 1594 } 1595 } 1596 for (i=xs+s; i<xs+nx-s; i++) { 1597 for (j=0; j<nc; j++) { 1598 cnt = 0; 1599 for (l=0; l<s; l++) { 1600 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1601 } 1602 if (dfill) { 1603 for (k=dfill[j]; k<dfill[j+1]; k++) { 1604 cols[cnt++] = i*nc + dfill[k]; 1605 } 1606 } else { 1607 for (k=0; k<nc; k++) { 1608 cols[cnt++] = i*nc + k; 1609 } 1610 } 1611 for (l=0; l<s; l++) { 1612 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1613 } 1614 CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1615 row++; 1616 } 1617 } 1618 /* coupling with process to the right */ 1619 for (i=xs+nx-s; i<xs+nx; i++) { 1620 for (j=0; j<nc; j++) { 1621 cnt = 0; 1622 for (l=0; l<s; l++) { 1623 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1624 } 1625 if (dfill) { 1626 for (k=dfill[j]; k<dfill[j+1]; k++) { 1627 cols[cnt++] = i*nc + dfill[k]; 1628 } 1629 } else { 1630 for (k=0; k<nc; k++) { 1631 cols[cnt++] = i*nc + k; 1632 } 1633 } 1634 if (rank < size-1) { 1635 for (l=0; l<s; l++) { 1636 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1637 } 1638 } 1639 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1640 for (l=0; l<s; l++) { 1641 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1642 } 1643 } 1644 CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1645 row++; 1646 } 1647 } 1648 CHKERRQ(PetscFree(cols)); 1649 /* do not copy values to GPU since they are all zero and not yet needed there */ 1650 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1651 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1652 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1653 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1654 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 /* ---------------------------------------------------------------------------------*/ 1660 1661 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1662 { 1663 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1664 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1665 PetscInt istart,iend; 1666 DMBoundaryType bx; 1667 ISLocalToGlobalMapping ltog,mltog; 1668 1669 PetscFunctionBegin; 1670 /* 1671 nc - number of components per grid point 1672 col - number of colors needed in one direction for single component problem 1673 1674 */ 1675 CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1676 if (bx == DM_BOUNDARY_NONE) { 1677 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1678 } 1679 col = 2*s + 1; 1680 1681 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1682 CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1683 1684 CHKERRQ(MatSetBlockSize(J,nc)); 1685 CHKERRQ(MatSeqAIJSetPreallocation(J,col*nc,NULL)); 1686 CHKERRQ(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL)); 1687 1688 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1689 CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1690 if (!mltog) { 1691 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1692 } 1693 1694 /* 1695 For each node in the grid: we get the neighbors in the local (on processor ordering 1696 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1697 PETSc ordering. 1698 */ 1699 if (!da->prealloc_only) { 1700 CHKERRQ(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1701 for (i=xs; i<xs+nx; i++) { 1702 istart = PetscMax(-s,gxs - i); 1703 iend = PetscMin(s,gxs + gnx - i - 1); 1704 slot = i - gxs; 1705 1706 cnt = 0; 1707 for (i1=istart; i1<iend+1; i1++) { 1708 cols[cnt++] = nc*(slot + i1); 1709 for (l=1; l<nc; l++) { 1710 cols[cnt] = 1 + cols[cnt-1];cnt++; 1711 } 1712 } 1713 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1714 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1715 } 1716 /* do not copy values to GPU since they are all zero and not yet needed there */ 1717 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1718 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1719 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1720 if (bx == DM_BOUNDARY_NONE) { 1721 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1722 } 1723 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1724 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1725 CHKERRQ(PetscFree2(rows,cols)); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 /* ---------------------------------------------------------------------------------*/ 1731 1732 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1733 { 1734 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1735 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1736 PetscInt istart,iend; 1737 DMBoundaryType bx; 1738 ISLocalToGlobalMapping ltog,mltog; 1739 1740 PetscFunctionBegin; 1741 /* 1742 nc - number of components per grid point 1743 col - number of colors needed in one direction for single component problem 1744 */ 1745 CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1746 col = 2*s + 1; 1747 1748 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1749 CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1750 1751 CHKERRQ(MatSetBlockSize(J,nc)); 1752 CHKERRQ(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc)); 1753 1754 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1755 CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1756 if (!mltog) { 1757 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1758 } 1759 1760 /* 1761 For each node in the grid: we get the neighbors in the local (on processor ordering 1762 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1763 PETSc ordering. 1764 */ 1765 if (!da->prealloc_only) { 1766 CHKERRQ(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1767 for (i=xs; i<xs+nx; i++) { 1768 istart = PetscMax(-s,gxs - i); 1769 iend = PetscMin(s,gxs + gnx - i - 1); 1770 slot = i - gxs; 1771 1772 cnt = 0; 1773 for (i1=istart; i1<iend+1; i1++) { 1774 cols[cnt++] = nc*(slot + i1); 1775 for (l=1; l<nc; l++) { 1776 cols[cnt] = 1 + cols[cnt-1];cnt++; 1777 } 1778 } 1779 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1780 CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1781 } 1782 /* do not copy values to GPU since they are all zero and not yet needed there */ 1783 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1784 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1785 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1786 if (bx == DM_BOUNDARY_NONE) { 1787 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1788 } 1789 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1790 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1791 CHKERRQ(PetscFree2(rows,cols)); 1792 } 1793 CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1798 { 1799 PetscErrorCode ierr; 1800 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1801 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1802 PetscInt istart,iend,jstart,jend,ii,jj; 1803 MPI_Comm comm; 1804 PetscScalar *values; 1805 DMBoundaryType bx,by; 1806 DMDAStencilType st; 1807 ISLocalToGlobalMapping ltog; 1808 1809 PetscFunctionBegin; 1810 /* 1811 nc - number of components per grid point 1812 col - number of colors needed in one direction for single component problem 1813 */ 1814 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1815 col = 2*s + 1; 1816 1817 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1818 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1819 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1820 1821 CHKERRQ(PetscMalloc1(col*col*nc*nc,&cols)); 1822 1823 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1824 1825 /* determine the matrix preallocation information */ 1826 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1827 for (i=xs; i<xs+nx; i++) { 1828 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1829 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1830 for (j=ys; j<ys+ny; j++) { 1831 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1832 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1833 slot = i - gxs + gnx*(j - gys); 1834 1835 /* Find block columns in block row */ 1836 cnt = 0; 1837 for (ii=istart; ii<iend+1; ii++) { 1838 for (jj=jstart; jj<jend+1; jj++) { 1839 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1840 cols[cnt++] = slot + ii + gnx*jj; 1841 } 1842 } 1843 } 1844 CHKERRQ(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1845 } 1846 } 1847 CHKERRQ(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1848 CHKERRQ(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1849 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1850 1851 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1852 1853 /* 1854 For each node in the grid: we get the neighbors in the local (on processor ordering 1855 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1856 PETSc ordering. 1857 */ 1858 if (!da->prealloc_only) { 1859 CHKERRQ(PetscCalloc1(col*col*nc*nc,&values)); 1860 for (i=xs; i<xs+nx; i++) { 1861 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1862 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1863 for (j=ys; j<ys+ny; j++) { 1864 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1865 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1866 slot = i - gxs + gnx*(j - gys); 1867 cnt = 0; 1868 for (ii=istart; ii<iend+1; ii++) { 1869 for (jj=jstart; jj<jend+1; jj++) { 1870 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1871 cols[cnt++] = slot + ii + gnx*jj; 1872 } 1873 } 1874 } 1875 CHKERRQ(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1876 } 1877 } 1878 CHKERRQ(PetscFree(values)); 1879 /* do not copy values to GPU since they are all zero and not yet needed there */ 1880 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1881 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1882 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1883 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1884 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1885 } 1886 CHKERRQ(PetscFree(cols)); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1891 { 1892 PetscErrorCode ierr; 1893 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1894 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1895 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1896 MPI_Comm comm; 1897 PetscScalar *values; 1898 DMBoundaryType bx,by,bz; 1899 DMDAStencilType st; 1900 ISLocalToGlobalMapping ltog; 1901 1902 PetscFunctionBegin; 1903 /* 1904 nc - number of components per grid point 1905 col - number of colors needed in one direction for single component problem 1906 1907 */ 1908 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 1909 col = 2*s + 1; 1910 1911 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1912 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1913 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 1914 1915 CHKERRQ(PetscMalloc1(col*col*col,&cols)); 1916 1917 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1918 1919 /* determine the matrix preallocation information */ 1920 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1921 for (i=xs; i<xs+nx; i++) { 1922 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1923 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1924 for (j=ys; j<ys+ny; j++) { 1925 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1926 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1927 for (k=zs; k<zs+nz; k++) { 1928 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1929 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1930 1931 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1932 1933 /* Find block columns in block row */ 1934 cnt = 0; 1935 for (ii=istart; ii<iend+1; ii++) { 1936 for (jj=jstart; jj<jend+1; jj++) { 1937 for (kk=kstart; kk<kend+1; kk++) { 1938 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1939 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1940 } 1941 } 1942 } 1943 } 1944 CHKERRQ(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1945 } 1946 } 1947 } 1948 CHKERRQ(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1949 CHKERRQ(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1950 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1951 1952 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1953 1954 /* 1955 For each node in the grid: we get the neighbors in the local (on processor ordering 1956 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1957 PETSc ordering. 1958 */ 1959 if (!da->prealloc_only) { 1960 CHKERRQ(PetscCalloc1(col*col*col*nc*nc,&values)); 1961 for (i=xs; i<xs+nx; i++) { 1962 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1963 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1964 for (j=ys; j<ys+ny; j++) { 1965 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1966 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1967 for (k=zs; k<zs+nz; k++) { 1968 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1969 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1970 1971 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1972 1973 cnt = 0; 1974 for (ii=istart; ii<iend+1; ii++) { 1975 for (jj=jstart; jj<jend+1; jj++) { 1976 for (kk=kstart; kk<kend+1; kk++) { 1977 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1978 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1979 } 1980 } 1981 } 1982 } 1983 CHKERRQ(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1984 } 1985 } 1986 } 1987 CHKERRQ(PetscFree(values)); 1988 /* do not copy values to GPU since they are all zero and not yet needed there */ 1989 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 1990 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1991 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1992 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 1993 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1994 } 1995 CHKERRQ(PetscFree(cols)); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /* 2000 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2001 identify in the local ordering with periodic domain. 2002 */ 2003 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2004 { 2005 PetscInt i,n; 2006 2007 PetscFunctionBegin; 2008 CHKERRQ(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row)); 2009 CHKERRQ(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col)); 2010 for (i=0,n=0; i<*cnt; i++) { 2011 if (col[i] >= *row) col[n++] = col[i]; 2012 } 2013 *cnt = n; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2018 { 2019 PetscErrorCode ierr; 2020 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2021 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2022 PetscInt istart,iend,jstart,jend,ii,jj; 2023 MPI_Comm comm; 2024 PetscScalar *values; 2025 DMBoundaryType bx,by; 2026 DMDAStencilType st; 2027 ISLocalToGlobalMapping ltog; 2028 2029 PetscFunctionBegin; 2030 /* 2031 nc - number of components per grid point 2032 col - number of colors needed in one direction for single component problem 2033 */ 2034 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 2035 col = 2*s + 1; 2036 2037 CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 2038 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 2039 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 2040 2041 CHKERRQ(PetscMalloc1(col*col*nc*nc,&cols)); 2042 2043 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 2044 2045 /* determine the matrix preallocation information */ 2046 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2047 for (i=xs; i<xs+nx; i++) { 2048 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2049 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2050 for (j=ys; j<ys+ny; j++) { 2051 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2052 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2053 slot = i - gxs + gnx*(j - gys); 2054 2055 /* Find block columns in block row */ 2056 cnt = 0; 2057 for (ii=istart; ii<iend+1; ii++) { 2058 for (jj=jstart; jj<jend+1; jj++) { 2059 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2060 cols[cnt++] = slot + ii + gnx*jj; 2061 } 2062 } 2063 } 2064 CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2065 CHKERRQ(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2066 } 2067 } 2068 CHKERRQ(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2069 CHKERRQ(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2070 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2071 2072 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2073 2074 /* 2075 For each node in the grid: we get the neighbors in the local (on processor ordering 2076 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2077 PETSc ordering. 2078 */ 2079 if (!da->prealloc_only) { 2080 CHKERRQ(PetscCalloc1(col*col*nc*nc,&values)); 2081 for (i=xs; i<xs+nx; i++) { 2082 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2083 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2084 for (j=ys; j<ys+ny; j++) { 2085 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2086 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2087 slot = i - gxs + gnx*(j - gys); 2088 2089 /* Find block columns in block row */ 2090 cnt = 0; 2091 for (ii=istart; ii<iend+1; ii++) { 2092 for (jj=jstart; jj<jend+1; jj++) { 2093 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2094 cols[cnt++] = slot + ii + gnx*jj; 2095 } 2096 } 2097 } 2098 CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2099 CHKERRQ(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2100 } 2101 } 2102 CHKERRQ(PetscFree(values)); 2103 /* do not copy values to GPU since they are all zero and not yet needed there */ 2104 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 2105 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2106 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2107 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 2108 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2109 } 2110 CHKERRQ(PetscFree(cols)); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2118 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2119 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2120 MPI_Comm comm; 2121 PetscScalar *values; 2122 DMBoundaryType bx,by,bz; 2123 DMDAStencilType st; 2124 ISLocalToGlobalMapping ltog; 2125 2126 PetscFunctionBegin; 2127 /* 2128 nc - number of components per grid point 2129 col - number of colors needed in one direction for single component problem 2130 */ 2131 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 2132 col = 2*s + 1; 2133 2134 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2135 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2136 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 2137 2138 /* create the matrix */ 2139 CHKERRQ(PetscMalloc1(col*col*col,&cols)); 2140 2141 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 2142 2143 /* determine the matrix preallocation information */ 2144 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2145 for (i=xs; i<xs+nx; i++) { 2146 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2147 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2148 for (j=ys; j<ys+ny; j++) { 2149 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2150 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2151 for (k=zs; k<zs+nz; k++) { 2152 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2153 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2154 2155 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2156 2157 /* Find block columns in block row */ 2158 cnt = 0; 2159 for (ii=istart; ii<iend+1; ii++) { 2160 for (jj=jstart; jj<jend+1; jj++) { 2161 for (kk=kstart; kk<kend+1; kk++) { 2162 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2163 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2164 } 2165 } 2166 } 2167 } 2168 CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2169 CHKERRQ(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2170 } 2171 } 2172 } 2173 CHKERRQ(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2174 CHKERRQ(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2175 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2176 2177 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2178 2179 /* 2180 For each node in the grid: we get the neighbors in the local (on processor ordering 2181 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2182 PETSc ordering. 2183 */ 2184 if (!da->prealloc_only) { 2185 CHKERRQ(PetscCalloc1(col*col*col*nc*nc,&values)); 2186 for (i=xs; i<xs+nx; i++) { 2187 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2188 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2189 for (j=ys; j<ys+ny; j++) { 2190 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2191 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2192 for (k=zs; k<zs+nz; k++) { 2193 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2194 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2195 2196 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2197 2198 cnt = 0; 2199 for (ii=istart; ii<iend+1; ii++) { 2200 for (jj=jstart; jj<jend+1; jj++) { 2201 for (kk=kstart; kk<kend+1; kk++) { 2202 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2203 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2204 } 2205 } 2206 } 2207 } 2208 CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2209 CHKERRQ(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2210 } 2211 } 2212 } 2213 CHKERRQ(PetscFree(values)); 2214 /* do not copy values to GPU since they are all zero and not yet needed there */ 2215 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 2216 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2217 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2218 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 2219 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2220 } 2221 CHKERRQ(PetscFree(cols)); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 /* ---------------------------------------------------------------------------------*/ 2226 2227 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2228 { 2229 PetscErrorCode ierr; 2230 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2231 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2232 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2233 DM_DA *dd = (DM_DA*)da->data; 2234 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2235 MPI_Comm comm; 2236 PetscScalar *values; 2237 DMBoundaryType bx,by,bz; 2238 ISLocalToGlobalMapping ltog; 2239 DMDAStencilType st; 2240 PetscBool removedups = PETSC_FALSE; 2241 2242 PetscFunctionBegin; 2243 /* 2244 nc - number of components per grid point 2245 col - number of colors needed in one direction for single component problem 2246 2247 */ 2248 CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 2249 col = 2*s + 1; 2250 PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2251 by 2*stencil_width + 1\n"); 2252 PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2253 by 2*stencil_width + 1\n"); 2254 PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2255 by 2*stencil_width + 1\n"); 2256 2257 /* 2258 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2259 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2260 */ 2261 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2262 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2263 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2264 2265 CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2266 CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2267 CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm)); 2268 2269 CHKERRQ(PetscMalloc1(col*col*col*nc,&cols)); 2270 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 2271 2272 /* determine the matrix preallocation information */ 2273 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2274 2275 CHKERRQ(MatSetBlockSize(J,nc)); 2276 for (i=xs; i<xs+nx; i++) { 2277 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2278 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2279 for (j=ys; j<ys+ny; j++) { 2280 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2281 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2282 for (k=zs; k<zs+nz; k++) { 2283 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2284 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2285 2286 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2287 2288 for (l=0; l<nc; l++) { 2289 cnt = 0; 2290 for (ii=istart; ii<iend+1; ii++) { 2291 for (jj=jstart; jj<jend+1; jj++) { 2292 for (kk=kstart; kk<kend+1; kk++) { 2293 if (ii || jj || kk) { 2294 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2295 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2296 } 2297 } else { 2298 if (dfill) { 2299 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2300 } else { 2301 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2302 } 2303 } 2304 } 2305 } 2306 } 2307 row = l + nc*(slot); 2308 maxcnt = PetscMax(maxcnt,cnt); 2309 if (removedups) { 2310 CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2311 } else { 2312 CHKERRQ(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2313 } 2314 } 2315 } 2316 } 2317 } 2318 CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz)); 2319 CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 2320 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2321 CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2322 2323 /* 2324 For each node in the grid: we get the neighbors in the local (on processor ordering 2325 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2326 PETSc ordering. 2327 */ 2328 if (!da->prealloc_only) { 2329 CHKERRQ(PetscCalloc1(maxcnt,&values)); 2330 for (i=xs; i<xs+nx; i++) { 2331 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2332 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2333 for (j=ys; j<ys+ny; j++) { 2334 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2335 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2336 for (k=zs; k<zs+nz; k++) { 2337 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2338 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2339 2340 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2341 2342 for (l=0; l<nc; l++) { 2343 cnt = 0; 2344 for (ii=istart; ii<iend+1; ii++) { 2345 for (jj=jstart; jj<jend+1; jj++) { 2346 for (kk=kstart; kk<kend+1; kk++) { 2347 if (ii || jj || kk) { 2348 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2349 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2350 } 2351 } else { 2352 if (dfill) { 2353 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2354 } else { 2355 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2356 } 2357 } 2358 } 2359 } 2360 } 2361 row = l + nc*(slot); 2362 CHKERRQ(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES)); 2363 } 2364 } 2365 } 2366 } 2367 CHKERRQ(PetscFree(values)); 2368 /* do not copy values to GPU since they are all zero and not yet needed there */ 2369 CHKERRQ(MatBindToCPU(J,PETSC_TRUE)); 2370 CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2371 CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2372 CHKERRQ(MatBindToCPU(J,PETSC_FALSE)); 2373 CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2374 } 2375 CHKERRQ(PetscFree(cols)); 2376 PetscFunctionReturn(0); 2377 } 2378