1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on da 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on da 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 if (ctype == IS_COLORING_GLOBAL) { 314 if (!dd->localcoloring) { 315 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 316 ii = 0; 317 for (j=ys; j<ys+ny; j++) { 318 for (i=xs; i<xs+nx; i++) { 319 for (k=0; k<nc; k++) { 320 colors[ii++] = k + nc*((i % col) + col*(j % col)); 321 } 322 } 323 } 324 ncolors = nc + nc*(col-1 + col*(col-1)); 325 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 326 } 327 *coloring = dd->localcoloring; 328 } else if (ctype == IS_COLORING_LOCAL) { 329 if (!dd->ghostedcoloring) { 330 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 331 ii = 0; 332 for (j=gys; j<gys+gny; j++) { 333 for (i=gxs; i<gxs+gnx; i++) { 334 for (k=0; k<nc; k++) { 335 /* the complicated stuff is to handle periodic boundaries */ 336 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 337 } 338 } 339 } 340 ncolors = nc + nc*(col - 1 + col*(col-1)); 341 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 342 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 343 344 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 345 } 346 *coloring = dd->ghostedcoloring; 347 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 348 } 349 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 /* ---------------------------------------------------------------------------------*/ 354 355 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 356 { 357 PetscErrorCode ierr; 358 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 359 PetscInt ncolors; 360 MPI_Comm comm; 361 DMBoundaryType bx,by,bz; 362 DMDAStencilType st; 363 ISColoringValue *colors; 364 DM_DA *dd = (DM_DA*)da->data; 365 366 PetscFunctionBegin; 367 /* 368 nc - number of components per grid point 369 col - number of colors needed in one direction for single component problem 370 371 */ 372 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 373 col = 2*s + 1; 374 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 375 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 376 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 377 378 /* create the coloring */ 379 if (ctype == IS_COLORING_GLOBAL) { 380 if (!dd->localcoloring) { 381 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 382 ii = 0; 383 for (k=zs; k<zs+nz; k++) { 384 for (j=ys; j<ys+ny; j++) { 385 for (i=xs; i<xs+nx; i++) { 386 for (l=0; l<nc; l++) { 387 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 388 } 389 } 390 } 391 } 392 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 393 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 394 } 395 *coloring = dd->localcoloring; 396 } else if (ctype == IS_COLORING_LOCAL) { 397 if (!dd->ghostedcoloring) { 398 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 399 ii = 0; 400 for (k=gzs; k<gzs+gnz; k++) { 401 for (j=gys; j<gys+gny; j++) { 402 for (i=gxs; i<gxs+gnx; i++) { 403 for (l=0; l<nc; l++) { 404 /* the complicated stuff is to handle periodic boundaries */ 405 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 406 } 407 } 408 } 409 } 410 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 411 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 412 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 413 } 414 *coloring = dd->ghostedcoloring; 415 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 416 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* ---------------------------------------------------------------------------------*/ 421 422 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 423 { 424 PetscErrorCode ierr; 425 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 426 PetscInt ncolors; 427 MPI_Comm comm; 428 DMBoundaryType bx; 429 ISColoringValue *colors; 430 DM_DA *dd = (DM_DA*)da->data; 431 432 PetscFunctionBegin; 433 /* 434 nc - number of components per grid point 435 col - number of colors needed in one direction for single component problem 436 437 */ 438 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 439 col = 2*s + 1; 440 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 441 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 442 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 443 444 /* create the coloring */ 445 if (ctype == IS_COLORING_GLOBAL) { 446 if (!dd->localcoloring) { 447 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 448 if (dd->ofillcols) { 449 PetscInt tc = 0; 450 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 451 i1 = 0; 452 for (i=xs; i<xs+nx; i++) { 453 for (l=0; l<nc; l++) { 454 if (dd->ofillcols[l] && (i % col)) { 455 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 456 } else { 457 colors[i1++] = l; 458 } 459 } 460 } 461 ncolors = nc + 2*s*tc; 462 } else { 463 i1 = 0; 464 for (i=xs; i<xs+nx; i++) { 465 for (l=0; l<nc; l++) { 466 colors[i1++] = l + nc*(i % col); 467 } 468 } 469 ncolors = nc + nc*(col-1); 470 } 471 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 472 } 473 *coloring = dd->localcoloring; 474 } else if (ctype == IS_COLORING_LOCAL) { 475 if (!dd->ghostedcoloring) { 476 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 477 i1 = 0; 478 for (i=gxs; i<gxs+gnx; i++) { 479 for (l=0; l<nc; l++) { 480 /* the complicated stuff is to handle periodic boundaries */ 481 colors[i1++] = l + nc*(SetInRange(i,m) % col); 482 } 483 } 484 ncolors = nc + nc*(col-1); 485 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 486 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 487 } 488 *coloring = dd->ghostedcoloring; 489 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 490 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 495 { 496 PetscErrorCode ierr; 497 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 498 PetscInt ncolors; 499 MPI_Comm comm; 500 DMBoundaryType bx,by; 501 ISColoringValue *colors; 502 DM_DA *dd = (DM_DA*)da->data; 503 504 PetscFunctionBegin; 505 /* 506 nc - number of components per grid point 507 col - number of colors needed in one direction for single component problem 508 509 */ 510 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 511 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 512 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 513 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 514 /* create the coloring */ 515 if (ctype == IS_COLORING_GLOBAL) { 516 if (!dd->localcoloring) { 517 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 518 ii = 0; 519 for (j=ys; j<ys+ny; j++) { 520 for (i=xs; i<xs+nx; i++) { 521 for (k=0; k<nc; k++) { 522 colors[ii++] = k + nc*((3*j+i) % 5); 523 } 524 } 525 } 526 ncolors = 5*nc; 527 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 528 } 529 *coloring = dd->localcoloring; 530 } else if (ctype == IS_COLORING_LOCAL) { 531 if (!dd->ghostedcoloring) { 532 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 533 ii = 0; 534 for (j=gys; j<gys+gny; j++) { 535 for (i=gxs; i<gxs+gnx; i++) { 536 for (k=0; k<nc; k++) { 537 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 538 } 539 } 540 } 541 ncolors = 5*nc; 542 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 543 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 544 } 545 *coloring = dd->ghostedcoloring; 546 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 547 PetscFunctionReturn(0); 548 } 549 550 /* =========================================================================== */ 551 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 552 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 557 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 558 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 559 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 560 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 561 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 562 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 563 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 564 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 565 566 /*@C 567 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 568 569 Logically Collective on mat 570 571 Input Parameters: 572 + mat - the matrix 573 - da - the da 574 575 Level: intermediate 576 577 @*/ 578 PetscErrorCode MatSetupDM(Mat mat,DM da) 579 { 580 PetscErrorCode ierr; 581 582 PetscFunctionBegin; 583 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 584 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 585 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 590 { 591 DM da; 592 PetscErrorCode ierr; 593 const char *prefix; 594 Mat Anatural; 595 AO ao; 596 PetscInt rstart,rend,*petsc,i; 597 IS is; 598 MPI_Comm comm; 599 PetscViewerFormat format; 600 601 PetscFunctionBegin; 602 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 605 606 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 607 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 608 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 609 610 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 611 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 612 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 613 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 614 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 615 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 616 617 /* call viewer on natural ordering */ 618 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 619 ierr = ISDestroy(&is);CHKERRQ(ierr); 620 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 621 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 622 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 623 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 624 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 625 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 626 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 631 { 632 DM da; 633 PetscErrorCode ierr; 634 Mat Anatural,Aapp; 635 AO ao; 636 PetscInt rstart,rend,*app,i,m,n,M,N; 637 IS is; 638 MPI_Comm comm; 639 640 PetscFunctionBegin; 641 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 642 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 643 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 644 645 /* Load the matrix in natural ordering */ 646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 647 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 648 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 650 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 651 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 652 653 /* Map natural ordering to application ordering and create IS */ 654 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 655 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 656 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 657 for (i=rstart; i<rend; i++) app[i-rstart] = i; 658 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 659 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 660 661 /* Do permutation and replace header */ 662 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 663 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 664 ierr = ISDestroy(&is);CHKERRQ(ierr); 665 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 670 { 671 PetscErrorCode ierr; 672 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 673 Mat A; 674 MPI_Comm comm; 675 MatType Atype; 676 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 677 MatType mtype; 678 PetscMPIInt size; 679 DM_DA *dd = (DM_DA*)da->data; 680 681 PetscFunctionBegin; 682 ierr = MatInitializePackage();CHKERRQ(ierr); 683 mtype = da->mattype; 684 685 /* 686 m 687 ------------------------------------------------------ 688 | | 689 | | 690 | ---------------------- | 691 | | | | 692 n | ny | | | 693 | | | | 694 | .--------------------- | 695 | (xs,ys) nx | 696 | . | 697 | (gxs,gys) | 698 | | 699 ----------------------------------------------------- 700 */ 701 702 /* 703 nc - number of components per grid point 704 col - number of colors needed in one direction for single component problem 705 706 */ 707 M = dd->M; 708 N = dd->N; 709 P = dd->P; 710 dim = da->dim; 711 dof = dd->w; 712 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 713 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 714 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 715 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 716 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 717 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 718 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 719 ierr = MatSetDM(A,da);CHKERRQ(ierr); 720 if (da->structure_only) { 721 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 722 } 723 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 724 /* 725 We do not provide a getmatrix function in the DMDA operations because 726 the basic DMDA does not know about matrices. We think of DMDA as being more 727 more low-level than matrices. This is kind of cheating but, cause sometimes 728 we think of DMDA has higher level than matrices. 729 730 We could switch based on Atype (or mtype), but we do not since the 731 specialized setting routines depend only on the particular preallocation 732 details of the matrix, not the type itself. 733 */ 734 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 735 if (!aij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 737 } 738 if (!aij) { 739 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 740 if (!baij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 742 } 743 if (!baij) { 744 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 745 if (!sbaij) { 746 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 747 } 748 if (!sbaij) { 749 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 750 if (!sell) { 751 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 752 } 753 } 754 if (!sell) { 755 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 756 } 757 } 758 } 759 if (aij) { 760 if (dim == 1) { 761 if (dd->ofill) { 762 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 763 } else { 764 DMBoundaryType bx; 765 PetscMPIInt size; 766 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 767 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 768 if (size == 1 && bx == DM_BOUNDARY_NONE) { 769 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 770 } else { 771 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 772 } 773 } 774 } else if (dim == 2) { 775 if (dd->ofill) { 776 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 777 } else { 778 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 779 } 780 } else if (dim == 3) { 781 if (dd->ofill) { 782 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 783 } else { 784 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 785 } 786 } 787 } else if (baij) { 788 if (dim == 2) { 789 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 790 } else if (dim == 3) { 791 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 792 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 793 } else if (sbaij) { 794 if (dim == 2) { 795 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 796 } else if (dim == 3) { 797 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 798 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 799 } else if (sell) { 800 if (dim == 2) { 801 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 802 } else if (dim == 3) { 803 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 804 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 805 } else if (is) { 806 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 807 } else { 808 ISLocalToGlobalMapping ltog; 809 810 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 811 ierr = MatSetUp(A);CHKERRQ(ierr); 812 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 813 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 814 } 815 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 816 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 817 ierr = MatSetDM(A,da);CHKERRQ(ierr); 818 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 819 if (size > 1) { 820 /* change viewer to display matrix in natural ordering */ 821 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 822 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 823 } 824 *J = A; 825 PetscFunctionReturn(0); 826 } 827 828 /* ---------------------------------------------------------------------------------*/ 829 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 830 831 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 832 { 833 DM_DA *da = (DM_DA*)dm->data; 834 Mat lJ; 835 ISLocalToGlobalMapping ltog; 836 IS is_loc_filt, is_glob; 837 const PetscInt *e_loc,*idx; 838 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 839 PetscBool flg; 840 PetscErrorCode ierr; 841 842 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 843 We need to filter the local indices that are represented through the DMDAGetElements decomposition 844 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 845 PetscFunctionBegin; 846 dof = da->w; 847 dim = dm->dim; 848 849 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 850 851 /* get local elements indices in local DMDA numbering */ 852 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 853 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 854 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 855 856 /* obtain a consistent local ordering for MATIS */ 857 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 858 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 859 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 860 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 861 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 862 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 863 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 864 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 865 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 867 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 868 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 869 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 870 871 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 872 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 873 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 874 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 875 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 876 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 877 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 878 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 879 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 880 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 881 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 882 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 884 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 885 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 886 ierr = PetscFree(gidx);CHKERRQ(ierr); 887 888 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 889 flg = dm->prealloc_only; 890 dm->prealloc_only = PETSC_TRUE; 891 switch (dim) { 892 case 1: 893 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 894 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 896 break; 897 case 2: 898 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 899 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 901 break; 902 case 3: 903 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 904 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 906 break; 907 default: 908 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 909 } 910 dm->prealloc_only = flg; 911 PetscFunctionReturn(0); 912 } 913 914 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 915 { 916 PetscErrorCode ierr; 917 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 918 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 919 MPI_Comm comm; 920 PetscScalar *values; 921 DMBoundaryType bx,by; 922 ISLocalToGlobalMapping ltog; 923 DMDAStencilType st; 924 925 PetscFunctionBegin; 926 /* 927 nc - number of components per grid point 928 col - number of colors needed in one direction for single component problem 929 930 */ 931 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 932 col = 2*s + 1; 933 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 934 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 935 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 936 937 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 938 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 939 940 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 941 /* determine the matrix preallocation information */ 942 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 943 for (i=xs; i<xs+nx; i++) { 944 945 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 946 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 947 948 for (j=ys; j<ys+ny; j++) { 949 slot = i - gxs + gnx*(j - gys); 950 951 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 952 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 953 954 cnt = 0; 955 for (k=0; k<nc; k++) { 956 for (l=lstart; l<lend+1; l++) { 957 for (p=pstart; p<pend+1; p++) { 958 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 959 cols[cnt++] = k + nc*(slot + gnx*l + p); 960 } 961 } 962 } 963 rows[k] = k + nc*(slot); 964 } 965 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 966 } 967 } 968 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 969 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 970 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 971 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 972 973 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 974 975 /* 976 For each node in the grid: we get the neighbors in the local (on processor ordering 977 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 978 PETSc ordering. 979 */ 980 if (!da->prealloc_only) { 981 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 982 for (i=xs; i<xs+nx; i++) { 983 984 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 985 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 986 987 for (j=ys; j<ys+ny; j++) { 988 slot = i - gxs + gnx*(j - gys); 989 990 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 991 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 992 993 cnt = 0; 994 for (k=0; k<nc; k++) { 995 for (l=lstart; l<lend+1; l++) { 996 for (p=pstart; p<pend+1; p++) { 997 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 998 cols[cnt++] = k + nc*(slot + gnx*l + p); 999 } 1000 } 1001 } 1002 rows[k] = k + nc*(slot); 1003 } 1004 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1005 } 1006 } 1007 ierr = PetscFree(values);CHKERRQ(ierr); 1008 /* do not copy values to GPU since they are all zero and not yet needed there */ 1009 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1010 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1011 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1012 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1013 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1020 { 1021 PetscErrorCode ierr; 1022 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1023 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1024 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1025 MPI_Comm comm; 1026 PetscScalar *values; 1027 DMBoundaryType bx,by,bz; 1028 ISLocalToGlobalMapping ltog; 1029 DMDAStencilType st; 1030 1031 PetscFunctionBegin; 1032 /* 1033 nc - number of components per grid point 1034 col - number of colors needed in one direction for single component problem 1035 1036 */ 1037 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1038 col = 2*s + 1; 1039 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1040 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1041 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1042 1043 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1044 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1045 1046 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1047 /* determine the matrix preallocation information */ 1048 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1049 for (i=xs; i<xs+nx; i++) { 1050 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1051 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1052 for (j=ys; j<ys+ny; j++) { 1053 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1054 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1055 for (k=zs; k<zs+nz; k++) { 1056 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1057 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1058 1059 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1060 1061 cnt = 0; 1062 for (l=0; l<nc; l++) { 1063 for (ii=istart; ii<iend+1; ii++) { 1064 for (jj=jstart; jj<jend+1; jj++) { 1065 for (kk=kstart; kk<kend+1; kk++) { 1066 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1067 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1068 } 1069 } 1070 } 1071 } 1072 rows[l] = l + nc*(slot); 1073 } 1074 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1075 } 1076 } 1077 } 1078 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1079 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1080 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1081 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1082 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1083 1084 /* 1085 For each node in the grid: we get the neighbors in the local (on processor ordering 1086 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1087 PETSc ordering. 1088 */ 1089 if (!da->prealloc_only) { 1090 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1091 for (i=xs; i<xs+nx; i++) { 1092 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1093 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1094 for (j=ys; j<ys+ny; j++) { 1095 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1096 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1097 for (k=zs; k<zs+nz; k++) { 1098 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1099 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1100 1101 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1102 1103 cnt = 0; 1104 for (l=0; l<nc; l++) { 1105 for (ii=istart; ii<iend+1; ii++) { 1106 for (jj=jstart; jj<jend+1; jj++) { 1107 for (kk=kstart; kk<kend+1; kk++) { 1108 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1109 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1110 } 1111 } 1112 } 1113 } 1114 rows[l] = l + nc*(slot); 1115 } 1116 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1117 } 1118 } 1119 } 1120 ierr = PetscFree(values);CHKERRQ(ierr); 1121 /* do not copy values to GPU since they are all zero and not yet needed there */ 1122 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1123 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1124 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1125 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1126 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1127 } 1128 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1133 { 1134 PetscErrorCode ierr; 1135 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1136 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1137 MPI_Comm comm; 1138 DMBoundaryType bx,by; 1139 ISLocalToGlobalMapping ltog,mltog; 1140 DMDAStencilType st; 1141 PetscBool removedups = PETSC_FALSE; 1142 1143 PetscFunctionBegin; 1144 /* 1145 nc - number of components per grid point 1146 col - number of colors needed in one direction for single component problem 1147 1148 */ 1149 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1150 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1151 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1152 } 1153 col = 2*s + 1; 1154 /* 1155 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1156 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1157 */ 1158 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1159 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1160 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1161 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1163 1164 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1165 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1166 1167 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1168 /* determine the matrix preallocation information */ 1169 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1170 for (i=xs; i<xs+nx; i++) { 1171 1172 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1173 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1174 1175 for (j=ys; j<ys+ny; j++) { 1176 slot = i - gxs + gnx*(j - gys); 1177 1178 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1179 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1180 1181 cnt = 0; 1182 for (k=0; k<nc; k++) { 1183 for (l=lstart; l<lend+1; l++) { 1184 for (p=pstart; p<pend+1; p++) { 1185 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1186 cols[cnt++] = k + nc*(slot + gnx*l + p); 1187 } 1188 } 1189 } 1190 rows[k] = k + nc*(slot); 1191 } 1192 if (removedups) { 1193 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1194 } else { 1195 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1196 } 1197 } 1198 } 1199 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1200 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1201 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1202 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1203 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1204 if (!mltog) { 1205 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1206 } 1207 1208 /* 1209 For each node in the grid: we get the neighbors in the local (on processor ordering 1210 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1211 PETSc ordering. 1212 */ 1213 if (!da->prealloc_only) { 1214 for (i=xs; i<xs+nx; i++) { 1215 1216 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1217 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1218 1219 for (j=ys; j<ys+ny; j++) { 1220 slot = i - gxs + gnx*(j - gys); 1221 1222 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1223 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1224 1225 cnt = 0; 1226 for (l=lstart; l<lend+1; l++) { 1227 for (p=pstart; p<pend+1; p++) { 1228 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1229 cols[cnt++] = nc*(slot + gnx*l + p); 1230 for (k=1; k<nc; k++) { 1231 cols[cnt] = 1 + cols[cnt-1];cnt++; 1232 } 1233 } 1234 } 1235 } 1236 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1237 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1238 } 1239 } 1240 /* do not copy values to GPU since they are all zero and not yet needed there */ 1241 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1242 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1244 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1245 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1246 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1247 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1248 } 1249 } 1250 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1255 { 1256 PetscErrorCode ierr; 1257 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1258 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1259 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1260 DM_DA *dd = (DM_DA*)da->data; 1261 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1262 MPI_Comm comm; 1263 DMBoundaryType bx,by; 1264 ISLocalToGlobalMapping ltog; 1265 DMDAStencilType st; 1266 PetscBool removedups = PETSC_FALSE; 1267 1268 PetscFunctionBegin; 1269 /* 1270 nc - number of components per grid point 1271 col - number of colors needed in one direction for single component problem 1272 1273 */ 1274 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1275 col = 2*s + 1; 1276 /* 1277 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1278 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1279 */ 1280 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1281 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1282 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1283 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1285 1286 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1287 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1288 1289 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1290 /* determine the matrix preallocation information */ 1291 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1292 for (i=xs; i<xs+nx; i++) { 1293 1294 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1295 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1296 1297 for (j=ys; j<ys+ny; j++) { 1298 slot = i - gxs + gnx*(j - gys); 1299 1300 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1301 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1302 1303 for (k=0; k<nc; k++) { 1304 cnt = 0; 1305 for (l=lstart; l<lend+1; l++) { 1306 for (p=pstart; p<pend+1; p++) { 1307 if (l || p) { 1308 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1309 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1310 } 1311 } else { 1312 if (dfill) { 1313 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1314 } else { 1315 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1316 } 1317 } 1318 } 1319 } 1320 row = k + nc*(slot); 1321 maxcnt = PetscMax(maxcnt,cnt); 1322 if (removedups) { 1323 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1324 } else { 1325 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1326 } 1327 } 1328 } 1329 } 1330 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1331 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1332 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1333 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1334 1335 /* 1336 For each node in the grid: we get the neighbors in the local (on processor ordering 1337 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1338 PETSc ordering. 1339 */ 1340 if (!da->prealloc_only) { 1341 for (i=xs; i<xs+nx; i++) { 1342 1343 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1344 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1345 1346 for (j=ys; j<ys+ny; j++) { 1347 slot = i - gxs + gnx*(j - gys); 1348 1349 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1350 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1351 1352 for (k=0; k<nc; k++) { 1353 cnt = 0; 1354 for (l=lstart; l<lend+1; l++) { 1355 for (p=pstart; p<pend+1; p++) { 1356 if (l || p) { 1357 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1358 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1359 } 1360 } else { 1361 if (dfill) { 1362 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1363 } else { 1364 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1365 } 1366 } 1367 } 1368 } 1369 row = k + nc*(slot); 1370 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1371 } 1372 } 1373 } 1374 /* do not copy values to GPU since they are all zero and not yet needed there */ 1375 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1376 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1379 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscFree(cols);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 /* ---------------------------------------------------------------------------------*/ 1386 1387 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1388 { 1389 PetscErrorCode ierr; 1390 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1391 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1392 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1393 MPI_Comm comm; 1394 DMBoundaryType bx,by,bz; 1395 ISLocalToGlobalMapping ltog,mltog; 1396 DMDAStencilType st; 1397 PetscBool removedups = PETSC_FALSE; 1398 1399 PetscFunctionBegin; 1400 /* 1401 nc - number of components per grid point 1402 col - number of colors needed in one direction for single component problem 1403 1404 */ 1405 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1406 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1407 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1408 } 1409 col = 2*s + 1; 1410 1411 /* 1412 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1413 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1414 */ 1415 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1416 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1417 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1418 1419 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1420 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1421 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1422 1423 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1424 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1425 1426 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1427 /* determine the matrix preallocation information */ 1428 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1429 for (i=xs; i<xs+nx; i++) { 1430 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1431 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1432 for (j=ys; j<ys+ny; j++) { 1433 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1434 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1435 for (k=zs; k<zs+nz; k++) { 1436 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1437 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1438 1439 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1440 1441 cnt = 0; 1442 for (l=0; l<nc; l++) { 1443 for (ii=istart; ii<iend+1; ii++) { 1444 for (jj=jstart; jj<jend+1; jj++) { 1445 for (kk=kstart; kk<kend+1; kk++) { 1446 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1447 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1448 } 1449 } 1450 } 1451 } 1452 rows[l] = l + nc*(slot); 1453 } 1454 if (removedups) { 1455 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1456 } else { 1457 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1458 } 1459 } 1460 } 1461 } 1462 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1463 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1464 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1465 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1466 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1467 if (!mltog) { 1468 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1469 } 1470 1471 /* 1472 For each node in the grid: we get the neighbors in the local (on processor ordering 1473 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1474 PETSc ordering. 1475 */ 1476 if (!da->prealloc_only) { 1477 for (i=xs; i<xs+nx; i++) { 1478 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1479 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1480 for (j=ys; j<ys+ny; j++) { 1481 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1482 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1483 for (k=zs; k<zs+nz; k++) { 1484 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1485 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1486 1487 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1488 1489 cnt = 0; 1490 for (kk=kstart; kk<kend+1; kk++) { 1491 for (jj=jstart; jj<jend+1; jj++) { 1492 for (ii=istart; ii<iend+1; ii++) { 1493 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1494 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1495 for (l=1; l<nc; l++) { 1496 cols[cnt] = 1 + cols[cnt-1];cnt++; 1497 } 1498 } 1499 } 1500 } 1501 } 1502 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1503 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1504 } 1505 } 1506 } 1507 /* do not copy values to GPU since they are all zero and not yet needed there */ 1508 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1509 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1512 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1513 } 1514 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1515 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1516 } 1517 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 /* ---------------------------------------------------------------------------------*/ 1522 1523 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1524 { 1525 PetscErrorCode ierr; 1526 DM_DA *dd = (DM_DA*)da->data; 1527 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1528 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1529 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1530 DMBoundaryType bx; 1531 ISLocalToGlobalMapping ltog; 1532 PetscMPIInt rank,size; 1533 1534 PetscFunctionBegin; 1535 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1536 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1537 1538 /* 1539 nc - number of components per grid point 1540 1541 */ 1542 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1543 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1544 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1545 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1546 1547 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1548 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1549 1550 /* 1551 note should be smaller for first and last process with no periodic 1552 does not handle dfill 1553 */ 1554 cnt = 0; 1555 /* coupling with process to the left */ 1556 for (i=0; i<s; i++) { 1557 for (j=0; j<nc; j++) { 1558 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1559 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1560 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1561 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1562 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1563 } 1564 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1565 cnt++; 1566 } 1567 } 1568 for (i=s; i<nx-s; i++) { 1569 for (j=0; j<nc; j++) { 1570 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1571 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1572 cnt++; 1573 } 1574 } 1575 /* coupling with process to the right */ 1576 for (i=nx-s; i<nx; i++) { 1577 for (j=0; j<nc; j++) { 1578 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1579 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1580 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1581 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1582 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1583 } 1584 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1585 cnt++; 1586 } 1587 } 1588 1589 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1590 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1591 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1592 1593 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1594 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1595 1596 /* 1597 For each node in the grid: we get the neighbors in the local (on processor ordering 1598 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1599 PETSc ordering. 1600 */ 1601 if (!da->prealloc_only) { 1602 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1603 row = xs*nc; 1604 /* coupling with process to the left */ 1605 for (i=xs; i<xs+s; i++) { 1606 for (j=0; j<nc; j++) { 1607 cnt = 0; 1608 if (rank) { 1609 for (l=0; l<s; l++) { 1610 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1611 } 1612 } 1613 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1614 for (l=0; l<s; l++) { 1615 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1616 } 1617 } 1618 if (dfill) { 1619 for (k=dfill[j]; k<dfill[j+1]; k++) { 1620 cols[cnt++] = i*nc + dfill[k]; 1621 } 1622 } else { 1623 for (k=0; k<nc; k++) { 1624 cols[cnt++] = i*nc + k; 1625 } 1626 } 1627 for (l=0; l<s; l++) { 1628 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1629 } 1630 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1631 row++; 1632 } 1633 } 1634 for (i=xs+s; i<xs+nx-s; i++) { 1635 for (j=0; j<nc; j++) { 1636 cnt = 0; 1637 for (l=0; l<s; l++) { 1638 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1639 } 1640 if (dfill) { 1641 for (k=dfill[j]; k<dfill[j+1]; k++) { 1642 cols[cnt++] = i*nc + dfill[k]; 1643 } 1644 } else { 1645 for (k=0; k<nc; k++) { 1646 cols[cnt++] = i*nc + k; 1647 } 1648 } 1649 for (l=0; l<s; l++) { 1650 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1651 } 1652 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1653 row++; 1654 } 1655 } 1656 /* coupling with process to the right */ 1657 for (i=xs+nx-s; i<xs+nx; i++) { 1658 for (j=0; j<nc; j++) { 1659 cnt = 0; 1660 for (l=0; l<s; l++) { 1661 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1662 } 1663 if (dfill) { 1664 for (k=dfill[j]; k<dfill[j+1]; k++) { 1665 cols[cnt++] = i*nc + dfill[k]; 1666 } 1667 } else { 1668 for (k=0; k<nc; k++) { 1669 cols[cnt++] = i*nc + k; 1670 } 1671 } 1672 if (rank < size-1) { 1673 for (l=0; l<s; l++) { 1674 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1675 } 1676 } 1677 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1678 for (l=0; l<s; l++) { 1679 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1680 } 1681 } 1682 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1683 row++; 1684 } 1685 } 1686 ierr = PetscFree(cols);CHKERRQ(ierr); 1687 /* do not copy values to GPU since they are all zero and not yet needed there */ 1688 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1689 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1692 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 /* ---------------------------------------------------------------------------------*/ 1698 1699 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1700 { 1701 PetscErrorCode ierr; 1702 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1703 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1704 PetscInt istart,iend; 1705 DMBoundaryType bx; 1706 ISLocalToGlobalMapping ltog,mltog; 1707 1708 PetscFunctionBegin; 1709 /* 1710 nc - number of components per grid point 1711 col - number of colors needed in one direction for single component problem 1712 1713 */ 1714 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1715 if (!isIS && bx == DM_BOUNDARY_NONE) { 1716 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1717 } 1718 col = 2*s + 1; 1719 1720 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1721 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1722 1723 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1724 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1725 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1726 1727 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1728 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1729 if (!mltog) { 1730 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1731 } 1732 1733 /* 1734 For each node in the grid: we get the neighbors in the local (on processor ordering 1735 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1736 PETSc ordering. 1737 */ 1738 if (!da->prealloc_only) { 1739 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1740 for (i=xs; i<xs+nx; i++) { 1741 istart = PetscMax(-s,gxs - i); 1742 iend = PetscMin(s,gxs + gnx - i - 1); 1743 slot = i - gxs; 1744 1745 cnt = 0; 1746 for (i1=istart; i1<iend+1; i1++) { 1747 cols[cnt++] = nc*(slot + i1); 1748 for (l=1; l<nc; l++) { 1749 cols[cnt] = 1 + cols[cnt-1];cnt++; 1750 } 1751 } 1752 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1753 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1754 } 1755 /* do not copy values to GPU since they are all zero and not yet needed there */ 1756 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1757 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1758 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 if (!isIS && bx == DM_BOUNDARY_NONE) { 1760 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1761 } 1762 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1763 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1764 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 /* ---------------------------------------------------------------------------------*/ 1770 1771 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1772 { 1773 PetscErrorCode ierr; 1774 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1775 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1776 PetscInt istart,iend; 1777 DMBoundaryType bx; 1778 ISLocalToGlobalMapping ltog,mltog; 1779 1780 PetscFunctionBegin; 1781 /* 1782 nc - number of components per grid point 1783 col - number of colors needed in one direction for single component problem 1784 */ 1785 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1786 col = 2*s + 1; 1787 1788 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1789 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1790 1791 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1792 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1793 1794 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1795 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1796 if (!mltog) { 1797 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1798 } 1799 1800 /* 1801 For each node in the grid: we get the neighbors in the local (on processor ordering 1802 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1803 PETSc ordering. 1804 */ 1805 if (!da->prealloc_only) { 1806 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1807 for (i=xs; i<xs+nx; i++) { 1808 istart = PetscMax(-s,gxs - i); 1809 iend = PetscMin(s,gxs + gnx - i - 1); 1810 slot = i - gxs; 1811 1812 cnt = 0; 1813 for (i1=istart; i1<iend+1; i1++) { 1814 cols[cnt++] = nc*(slot + i1); 1815 for (l=1; l<nc; l++) { 1816 cols[cnt] = 1 + cols[cnt-1];cnt++; 1817 } 1818 } 1819 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1820 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1821 } 1822 /* do not copy values to GPU since they are all zero and not yet needed there */ 1823 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1824 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1825 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 if (!isIS && bx == DM_BOUNDARY_NONE) { 1827 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1828 } 1829 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1830 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1831 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1832 } 1833 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1838 { 1839 PetscErrorCode ierr; 1840 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1841 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1842 PetscInt istart,iend,jstart,jend,ii,jj; 1843 MPI_Comm comm; 1844 PetscScalar *values; 1845 DMBoundaryType bx,by; 1846 DMDAStencilType st; 1847 ISLocalToGlobalMapping ltog; 1848 1849 PetscFunctionBegin; 1850 /* 1851 nc - number of components per grid point 1852 col - number of colors needed in one direction for single component problem 1853 */ 1854 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1855 col = 2*s + 1; 1856 1857 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1858 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1859 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1860 1861 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1862 1863 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1864 1865 /* determine the matrix preallocation information */ 1866 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1867 for (i=xs; i<xs+nx; i++) { 1868 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1869 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1870 for (j=ys; j<ys+ny; j++) { 1871 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1872 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1873 slot = i - gxs + gnx*(j - gys); 1874 1875 /* Find block columns in block row */ 1876 cnt = 0; 1877 for (ii=istart; ii<iend+1; ii++) { 1878 for (jj=jstart; jj<jend+1; jj++) { 1879 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1880 cols[cnt++] = slot + ii + gnx*jj; 1881 } 1882 } 1883 } 1884 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1885 } 1886 } 1887 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1888 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1889 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1890 1891 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1892 1893 /* 1894 For each node in the grid: we get the neighbors in the local (on processor ordering 1895 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1896 PETSc ordering. 1897 */ 1898 if (!da->prealloc_only) { 1899 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1900 for (i=xs; i<xs+nx; i++) { 1901 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1902 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1903 for (j=ys; j<ys+ny; j++) { 1904 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1905 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1906 slot = i - gxs + gnx*(j - gys); 1907 cnt = 0; 1908 for (ii=istart; ii<iend+1; ii++) { 1909 for (jj=jstart; jj<jend+1; jj++) { 1910 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1911 cols[cnt++] = slot + ii + gnx*jj; 1912 } 1913 } 1914 } 1915 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1916 } 1917 } 1918 ierr = PetscFree(values);CHKERRQ(ierr); 1919 /* do not copy values to GPU since they are all zero and not yet needed there */ 1920 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1921 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1922 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1923 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1924 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1925 } 1926 ierr = PetscFree(cols);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1931 { 1932 PetscErrorCode ierr; 1933 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1934 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1935 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1936 MPI_Comm comm; 1937 PetscScalar *values; 1938 DMBoundaryType bx,by,bz; 1939 DMDAStencilType st; 1940 ISLocalToGlobalMapping ltog; 1941 1942 PetscFunctionBegin; 1943 /* 1944 nc - number of components per grid point 1945 col - number of colors needed in one direction for single component problem 1946 1947 */ 1948 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1949 col = 2*s + 1; 1950 1951 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1952 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1953 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1954 1955 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1956 1957 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1958 1959 /* determine the matrix preallocation information */ 1960 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1961 for (i=xs; i<xs+nx; i++) { 1962 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1963 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1964 for (j=ys; j<ys+ny; j++) { 1965 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1966 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1967 for (k=zs; k<zs+nz; k++) { 1968 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1969 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1970 1971 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1972 1973 /* Find block columns in block row */ 1974 cnt = 0; 1975 for (ii=istart; ii<iend+1; ii++) { 1976 for (jj=jstart; jj<jend+1; jj++) { 1977 for (kk=kstart; kk<kend+1; kk++) { 1978 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1979 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1980 } 1981 } 1982 } 1983 } 1984 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1985 } 1986 } 1987 } 1988 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1989 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1990 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1991 1992 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1993 1994 /* 1995 For each node in the grid: we get the neighbors in the local (on processor ordering 1996 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1997 PETSc ordering. 1998 */ 1999 if (!da->prealloc_only) { 2000 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2001 for (i=xs; i<xs+nx; i++) { 2002 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2003 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2004 for (j=ys; j<ys+ny; j++) { 2005 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2006 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2007 for (k=zs; k<zs+nz; k++) { 2008 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2009 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2010 2011 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2012 2013 cnt = 0; 2014 for (ii=istart; ii<iend+1; ii++) { 2015 for (jj=jstart; jj<jend+1; jj++) { 2016 for (kk=kstart; kk<kend+1; kk++) { 2017 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2018 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2019 } 2020 } 2021 } 2022 } 2023 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2024 } 2025 } 2026 } 2027 ierr = PetscFree(values);CHKERRQ(ierr); 2028 /* do not copy values to GPU since they are all zero and not yet needed there */ 2029 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2030 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2031 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2033 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(cols);CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 /* 2040 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2041 identify in the local ordering with periodic domain. 2042 */ 2043 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2044 { 2045 PetscErrorCode ierr; 2046 PetscInt i,n; 2047 2048 PetscFunctionBegin; 2049 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2050 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2051 for (i=0,n=0; i<*cnt; i++) { 2052 if (col[i] >= *row) col[n++] = col[i]; 2053 } 2054 *cnt = n; 2055 PetscFunctionReturn(0); 2056 } 2057 2058 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2059 { 2060 PetscErrorCode ierr; 2061 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2062 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2063 PetscInt istart,iend,jstart,jend,ii,jj; 2064 MPI_Comm comm; 2065 PetscScalar *values; 2066 DMBoundaryType bx,by; 2067 DMDAStencilType st; 2068 ISLocalToGlobalMapping ltog; 2069 2070 PetscFunctionBegin; 2071 /* 2072 nc - number of components per grid point 2073 col - number of colors needed in one direction for single component problem 2074 */ 2075 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2076 col = 2*s + 1; 2077 2078 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2079 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2080 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2081 2082 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2083 2084 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2085 2086 /* determine the matrix preallocation information */ 2087 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2088 for (i=xs; i<xs+nx; i++) { 2089 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2090 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2091 for (j=ys; j<ys+ny; j++) { 2092 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2093 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2094 slot = i - gxs + gnx*(j - gys); 2095 2096 /* Find block columns in block row */ 2097 cnt = 0; 2098 for (ii=istart; ii<iend+1; ii++) { 2099 for (jj=jstart; jj<jend+1; jj++) { 2100 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2101 cols[cnt++] = slot + ii + gnx*jj; 2102 } 2103 } 2104 } 2105 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2106 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2107 } 2108 } 2109 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2110 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2111 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2112 2113 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2114 2115 /* 2116 For each node in the grid: we get the neighbors in the local (on processor ordering 2117 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2118 PETSc ordering. 2119 */ 2120 if (!da->prealloc_only) { 2121 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2122 for (i=xs; i<xs+nx; i++) { 2123 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2124 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2125 for (j=ys; j<ys+ny; j++) { 2126 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2127 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2128 slot = i - gxs + gnx*(j - gys); 2129 2130 /* Find block columns in block row */ 2131 cnt = 0; 2132 for (ii=istart; ii<iend+1; ii++) { 2133 for (jj=jstart; jj<jend+1; jj++) { 2134 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2135 cols[cnt++] = slot + ii + gnx*jj; 2136 } 2137 } 2138 } 2139 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2140 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2141 } 2142 } 2143 ierr = PetscFree(values);CHKERRQ(ierr); 2144 /* do not copy values to GPU since they are all zero and not yet needed there */ 2145 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2146 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2149 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2150 } 2151 ierr = PetscFree(cols);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2156 { 2157 PetscErrorCode ierr; 2158 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2159 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2160 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2161 MPI_Comm comm; 2162 PetscScalar *values; 2163 DMBoundaryType bx,by,bz; 2164 DMDAStencilType st; 2165 ISLocalToGlobalMapping ltog; 2166 2167 PetscFunctionBegin; 2168 /* 2169 nc - number of components per grid point 2170 col - number of colors needed in one direction for single component problem 2171 */ 2172 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2173 col = 2*s + 1; 2174 2175 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2176 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2177 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2178 2179 /* create the matrix */ 2180 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2181 2182 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2183 2184 /* determine the matrix preallocation information */ 2185 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2186 for (i=xs; i<xs+nx; i++) { 2187 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2188 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2189 for (j=ys; j<ys+ny; j++) { 2190 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2191 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2192 for (k=zs; k<zs+nz; k++) { 2193 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2194 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2195 2196 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2197 2198 /* Find block columns in block row */ 2199 cnt = 0; 2200 for (ii=istart; ii<iend+1; ii++) { 2201 for (jj=jstart; jj<jend+1; jj++) { 2202 for (kk=kstart; kk<kend+1; kk++) { 2203 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2204 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2205 } 2206 } 2207 } 2208 } 2209 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2210 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2211 } 2212 } 2213 } 2214 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2215 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2216 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2217 2218 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2219 2220 /* 2221 For each node in the grid: we get the neighbors in the local (on processor ordering 2222 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2223 PETSc ordering. 2224 */ 2225 if (!da->prealloc_only) { 2226 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2227 for (i=xs; i<xs+nx; i++) { 2228 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2229 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2230 for (j=ys; j<ys+ny; j++) { 2231 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2232 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2233 for (k=zs; k<zs+nz; k++) { 2234 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2235 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2236 2237 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2238 2239 cnt = 0; 2240 for (ii=istart; ii<iend+1; ii++) { 2241 for (jj=jstart; jj<jend+1; jj++) { 2242 for (kk=kstart; kk<kend+1; kk++) { 2243 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2244 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2245 } 2246 } 2247 } 2248 } 2249 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2250 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2251 } 2252 } 2253 } 2254 ierr = PetscFree(values);CHKERRQ(ierr); 2255 /* do not copy values to GPU since they are all zero and not yet needed there */ 2256 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2257 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2260 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2261 } 2262 ierr = PetscFree(cols);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 /* ---------------------------------------------------------------------------------*/ 2267 2268 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2269 { 2270 PetscErrorCode ierr; 2271 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2272 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2273 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2274 DM_DA *dd = (DM_DA*)da->data; 2275 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2276 MPI_Comm comm; 2277 PetscScalar *values; 2278 DMBoundaryType bx,by,bz; 2279 ISLocalToGlobalMapping ltog; 2280 DMDAStencilType st; 2281 PetscBool removedups = PETSC_FALSE; 2282 2283 PetscFunctionBegin; 2284 /* 2285 nc - number of components per grid point 2286 col - number of colors needed in one direction for single component problem 2287 2288 */ 2289 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2290 col = 2*s + 1; 2291 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2292 by 2*stencil_width + 1\n"); 2293 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2294 by 2*stencil_width + 1\n"); 2295 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2296 by 2*stencil_width + 1\n"); 2297 2298 /* 2299 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2300 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2301 */ 2302 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2303 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2304 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2305 2306 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2307 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2308 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2309 2310 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2311 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2312 2313 /* determine the matrix preallocation information */ 2314 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2315 2316 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2317 for (i=xs; i<xs+nx; i++) { 2318 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2319 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2320 for (j=ys; j<ys+ny; j++) { 2321 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2322 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2323 for (k=zs; k<zs+nz; k++) { 2324 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2325 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2326 2327 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2328 2329 for (l=0; l<nc; l++) { 2330 cnt = 0; 2331 for (ii=istart; ii<iend+1; ii++) { 2332 for (jj=jstart; jj<jend+1; jj++) { 2333 for (kk=kstart; kk<kend+1; kk++) { 2334 if (ii || jj || kk) { 2335 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2336 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2337 } 2338 } else { 2339 if (dfill) { 2340 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2341 } else { 2342 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2343 } 2344 } 2345 } 2346 } 2347 } 2348 row = l + nc*(slot); 2349 maxcnt = PetscMax(maxcnt,cnt); 2350 if (removedups) { 2351 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2352 } else { 2353 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2354 } 2355 } 2356 } 2357 } 2358 } 2359 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2360 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2361 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2362 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2363 2364 /* 2365 For each node in the grid: we get the neighbors in the local (on processor ordering 2366 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2367 PETSc ordering. 2368 */ 2369 if (!da->prealloc_only) { 2370 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2371 for (i=xs; i<xs+nx; i++) { 2372 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2373 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2374 for (j=ys; j<ys+ny; j++) { 2375 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2376 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2377 for (k=zs; k<zs+nz; k++) { 2378 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2379 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2380 2381 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2382 2383 for (l=0; l<nc; l++) { 2384 cnt = 0; 2385 for (ii=istart; ii<iend+1; ii++) { 2386 for (jj=jstart; jj<jend+1; jj++) { 2387 for (kk=kstart; kk<kend+1; kk++) { 2388 if (ii || jj || kk) { 2389 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2390 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2391 } 2392 } else { 2393 if (dfill) { 2394 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2395 } else { 2396 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2397 } 2398 } 2399 } 2400 } 2401 } 2402 row = l + nc*(slot); 2403 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2404 } 2405 } 2406 } 2407 } 2408 ierr = PetscFree(values);CHKERRQ(ierr); 2409 /* do not copy values to GPU since they are all zero and not yet needed there */ 2410 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2411 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2413 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2414 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2415 } 2416 ierr = PetscFree(cols);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419