1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on da 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on da 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 if (ctype == IS_COLORING_GLOBAL) { 314 if (!dd->localcoloring) { 315 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 316 ii = 0; 317 for (j=ys; j<ys+ny; j++) { 318 for (i=xs; i<xs+nx; i++) { 319 for (k=0; k<nc; k++) { 320 colors[ii++] = k + nc*((i % col) + col*(j % col)); 321 } 322 } 323 } 324 ncolors = nc + nc*(col-1 + col*(col-1)); 325 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 326 } 327 *coloring = dd->localcoloring; 328 } else if (ctype == IS_COLORING_LOCAL) { 329 if (!dd->ghostedcoloring) { 330 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 331 ii = 0; 332 for (j=gys; j<gys+gny; j++) { 333 for (i=gxs; i<gxs+gnx; i++) { 334 for (k=0; k<nc; k++) { 335 /* the complicated stuff is to handle periodic boundaries */ 336 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 337 } 338 } 339 } 340 ncolors = nc + nc*(col - 1 + col*(col-1)); 341 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 342 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 343 344 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 345 } 346 *coloring = dd->ghostedcoloring; 347 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 348 } 349 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 /* ---------------------------------------------------------------------------------*/ 354 355 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 356 { 357 PetscErrorCode ierr; 358 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 359 PetscInt ncolors; 360 MPI_Comm comm; 361 DMBoundaryType bx,by,bz; 362 DMDAStencilType st; 363 ISColoringValue *colors; 364 DM_DA *dd = (DM_DA*)da->data; 365 366 PetscFunctionBegin; 367 /* 368 nc - number of components per grid point 369 col - number of colors needed in one direction for single component problem 370 371 */ 372 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 373 col = 2*s + 1; 374 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 375 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 376 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 377 378 /* create the coloring */ 379 if (ctype == IS_COLORING_GLOBAL) { 380 if (!dd->localcoloring) { 381 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 382 ii = 0; 383 for (k=zs; k<zs+nz; k++) { 384 for (j=ys; j<ys+ny; j++) { 385 for (i=xs; i<xs+nx; i++) { 386 for (l=0; l<nc; l++) { 387 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 388 } 389 } 390 } 391 } 392 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 393 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 394 } 395 *coloring = dd->localcoloring; 396 } else if (ctype == IS_COLORING_LOCAL) { 397 if (!dd->ghostedcoloring) { 398 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 399 ii = 0; 400 for (k=gzs; k<gzs+gnz; k++) { 401 for (j=gys; j<gys+gny; j++) { 402 for (i=gxs; i<gxs+gnx; i++) { 403 for (l=0; l<nc; l++) { 404 /* the complicated stuff is to handle periodic boundaries */ 405 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 406 } 407 } 408 } 409 } 410 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 411 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 412 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 413 } 414 *coloring = dd->ghostedcoloring; 415 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 416 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* ---------------------------------------------------------------------------------*/ 421 422 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 423 { 424 PetscErrorCode ierr; 425 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 426 PetscInt ncolors; 427 MPI_Comm comm; 428 DMBoundaryType bx; 429 ISColoringValue *colors; 430 DM_DA *dd = (DM_DA*)da->data; 431 432 PetscFunctionBegin; 433 /* 434 nc - number of components per grid point 435 col - number of colors needed in one direction for single component problem 436 437 */ 438 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 439 col = 2*s + 1; 440 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 441 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 442 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 443 444 /* create the coloring */ 445 if (ctype == IS_COLORING_GLOBAL) { 446 if (!dd->localcoloring) { 447 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 448 if (dd->ofillcols) { 449 PetscInt tc = 0; 450 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 451 i1 = 0; 452 for (i=xs; i<xs+nx; i++) { 453 for (l=0; l<nc; l++) { 454 if (dd->ofillcols[l] && (i % col)) { 455 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 456 } else { 457 colors[i1++] = l; 458 } 459 } 460 } 461 ncolors = nc + 2*s*tc; 462 } else { 463 i1 = 0; 464 for (i=xs; i<xs+nx; i++) { 465 for (l=0; l<nc; l++) { 466 colors[i1++] = l + nc*(i % col); 467 } 468 } 469 ncolors = nc + nc*(col-1); 470 } 471 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 472 } 473 *coloring = dd->localcoloring; 474 } else if (ctype == IS_COLORING_LOCAL) { 475 if (!dd->ghostedcoloring) { 476 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 477 i1 = 0; 478 for (i=gxs; i<gxs+gnx; i++) { 479 for (l=0; l<nc; l++) { 480 /* the complicated stuff is to handle periodic boundaries */ 481 colors[i1++] = l + nc*(SetInRange(i,m) % col); 482 } 483 } 484 ncolors = nc + nc*(col-1); 485 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 486 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 487 } 488 *coloring = dd->ghostedcoloring; 489 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 490 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 495 { 496 PetscErrorCode ierr; 497 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 498 PetscInt ncolors; 499 MPI_Comm comm; 500 DMBoundaryType bx,by; 501 ISColoringValue *colors; 502 DM_DA *dd = (DM_DA*)da->data; 503 504 PetscFunctionBegin; 505 /* 506 nc - number of components per grid point 507 col - number of colors needed in one direction for single component problem 508 509 */ 510 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 511 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 512 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 513 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 514 /* create the coloring */ 515 if (ctype == IS_COLORING_GLOBAL) { 516 if (!dd->localcoloring) { 517 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 518 ii = 0; 519 for (j=ys; j<ys+ny; j++) { 520 for (i=xs; i<xs+nx; i++) { 521 for (k=0; k<nc; k++) { 522 colors[ii++] = k + nc*((3*j+i) % 5); 523 } 524 } 525 } 526 ncolors = 5*nc; 527 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 528 } 529 *coloring = dd->localcoloring; 530 } else if (ctype == IS_COLORING_LOCAL) { 531 if (!dd->ghostedcoloring) { 532 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 533 ii = 0; 534 for (j=gys; j<gys+gny; j++) { 535 for (i=gxs; i<gxs+gnx; i++) { 536 for (k=0; k<nc; k++) { 537 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 538 } 539 } 540 } 541 ncolors = 5*nc; 542 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 543 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 544 } 545 *coloring = dd->ghostedcoloring; 546 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 547 PetscFunctionReturn(0); 548 } 549 550 /* =========================================================================== */ 551 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 552 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 557 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 558 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 559 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 560 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 561 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 562 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 563 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 564 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 565 566 /*@C 567 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 568 569 Logically Collective on mat 570 571 Input Parameters: 572 + mat - the matrix 573 - da - the da 574 575 Level: intermediate 576 577 @*/ 578 PetscErrorCode MatSetupDM(Mat mat,DM da) 579 { 580 PetscErrorCode ierr; 581 582 PetscFunctionBegin; 583 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 584 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 585 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 590 { 591 DM da; 592 PetscErrorCode ierr; 593 const char *prefix; 594 Mat Anatural; 595 AO ao; 596 PetscInt rstart,rend,*petsc,i; 597 IS is; 598 MPI_Comm comm; 599 PetscViewerFormat format; 600 601 PetscFunctionBegin; 602 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 605 606 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 607 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 608 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 609 610 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 611 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 612 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 613 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 614 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 615 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 616 617 /* call viewer on natural ordering */ 618 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 619 ierr = ISDestroy(&is);CHKERRQ(ierr); 620 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 621 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 622 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 623 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 624 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 625 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 626 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 631 { 632 DM da; 633 PetscErrorCode ierr; 634 Mat Anatural,Aapp; 635 AO ao; 636 PetscInt rstart,rend,*app,i,m,n,M,N; 637 IS is; 638 MPI_Comm comm; 639 640 PetscFunctionBegin; 641 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 642 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 643 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 644 645 /* Load the matrix in natural ordering */ 646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 647 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 648 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 650 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 651 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 652 653 /* Map natural ordering to application ordering and create IS */ 654 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 655 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 656 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 657 for (i=rstart; i<rend; i++) app[i-rstart] = i; 658 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 659 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 660 661 /* Do permutation and replace header */ 662 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 663 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 664 ierr = ISDestroy(&is);CHKERRQ(ierr); 665 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 670 { 671 PetscErrorCode ierr; 672 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 673 Mat A; 674 MPI_Comm comm; 675 MatType Atype; 676 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 677 MatType mtype; 678 PetscMPIInt size; 679 DM_DA *dd = (DM_DA*)da->data; 680 681 PetscFunctionBegin; 682 ierr = MatInitializePackage();CHKERRQ(ierr); 683 mtype = da->mattype; 684 685 /* 686 m 687 ------------------------------------------------------ 688 | | 689 | | 690 | ---------------------- | 691 | | | | 692 n | ny | | | 693 | | | | 694 | .--------------------- | 695 | (xs,ys) nx | 696 | . | 697 | (gxs,gys) | 698 | | 699 ----------------------------------------------------- 700 */ 701 702 /* 703 nc - number of components per grid point 704 col - number of colors needed in one direction for single component problem 705 706 */ 707 M = dd->M; 708 N = dd->N; 709 P = dd->P; 710 dim = da->dim; 711 dof = dd->w; 712 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 713 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 714 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 715 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 716 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 717 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 718 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 719 ierr = MatSetDM(A,da);CHKERRQ(ierr); 720 if (da->structure_only) { 721 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 722 } 723 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 724 /* 725 We do not provide a getmatrix function in the DMDA operations because 726 the basic DMDA does not know about matrices. We think of DMDA as being more 727 more low-level than matrices. This is kind of cheating but, cause sometimes 728 we think of DMDA has higher level than matrices. 729 730 We could switch based on Atype (or mtype), but we do not since the 731 specialized setting routines depend only on the particular preallocation 732 details of the matrix, not the type itself. 733 */ 734 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 735 if (!aij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 737 } 738 if (!aij) { 739 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 740 if (!baij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 742 } 743 if (!baij) { 744 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 745 if (!sbaij) { 746 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 747 } 748 if (!sbaij) { 749 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 750 if (!sell) { 751 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 752 } 753 } 754 if (!sell) { 755 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 756 } 757 } 758 } 759 if (aij) { 760 if (dim == 1) { 761 if (dd->ofill) { 762 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 763 } else { 764 DMBoundaryType bx; 765 PetscMPIInt size; 766 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 767 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 768 if (size == 1 && bx == DM_BOUNDARY_NONE) { 769 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 770 } else { 771 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 772 } 773 } 774 } else if (dim == 2) { 775 if (dd->ofill) { 776 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 777 } else { 778 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 779 } 780 } else if (dim == 3) { 781 if (dd->ofill) { 782 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 783 } else { 784 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 785 } 786 } 787 } else if (baij) { 788 if (dim == 2) { 789 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 790 } else if (dim == 3) { 791 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 792 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 793 } else if (sbaij) { 794 if (dim == 2) { 795 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 796 } else if (dim == 3) { 797 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 798 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 799 } else if (sell) { 800 if (dim == 2) { 801 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 802 } else if (dim == 3) { 803 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 804 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 805 } else if (is) { 806 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 807 } else { 808 ISLocalToGlobalMapping ltog; 809 810 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 811 ierr = MatSetUp(A);CHKERRQ(ierr); 812 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 813 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 814 } 815 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 816 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 817 ierr = MatSetDM(A,da);CHKERRQ(ierr); 818 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 819 if (size > 1) { 820 /* change viewer to display matrix in natural ordering */ 821 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 822 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 823 } 824 *J = A; 825 PetscFunctionReturn(0); 826 } 827 828 /* ---------------------------------------------------------------------------------*/ 829 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 830 831 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 832 { 833 DM_DA *da = (DM_DA*)dm->data; 834 Mat lJ; 835 ISLocalToGlobalMapping ltog; 836 IS is_loc_filt, is_glob; 837 const PetscInt *e_loc,*idx; 838 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 839 PetscBool flg; 840 PetscErrorCode ierr; 841 842 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 843 We need to filter the local indices that are represented through the DMDAGetElements decomposition 844 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 845 PetscFunctionBegin; 846 dof = da->w; 847 dim = dm->dim; 848 849 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 850 851 /* get local elements indices in local DMDA numbering */ 852 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 853 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 854 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 855 856 /* obtain a consistent local ordering for MATIS */ 857 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 858 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 859 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 860 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 861 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 862 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 863 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 864 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 865 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 867 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 868 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 869 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 870 871 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 872 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 873 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 874 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 875 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 876 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 877 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 878 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 879 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 880 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 881 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 882 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 884 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 885 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 886 ierr = PetscFree(gidx);CHKERRQ(ierr); 887 888 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 889 flg = dm->prealloc_only; 890 dm->prealloc_only = PETSC_TRUE; 891 switch (dim) { 892 case 1: 893 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 894 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 896 break; 897 case 2: 898 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 899 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 901 break; 902 case 3: 903 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 904 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 906 break; 907 default: 908 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 909 break; 910 } 911 dm->prealloc_only = flg; 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 916 { 917 PetscErrorCode ierr; 918 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 919 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 920 MPI_Comm comm; 921 PetscScalar *values; 922 DMBoundaryType bx,by; 923 ISLocalToGlobalMapping ltog; 924 DMDAStencilType st; 925 926 PetscFunctionBegin; 927 /* 928 nc - number of components per grid point 929 col - number of colors needed in one direction for single component problem 930 931 */ 932 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 933 col = 2*s + 1; 934 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 935 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 936 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 937 938 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 939 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 940 941 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 942 /* determine the matrix preallocation information */ 943 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 944 for (i=xs; i<xs+nx; i++) { 945 946 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 947 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 948 949 for (j=ys; j<ys+ny; j++) { 950 slot = i - gxs + gnx*(j - gys); 951 952 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 953 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 954 955 cnt = 0; 956 for (k=0; k<nc; k++) { 957 for (l=lstart; l<lend+1; l++) { 958 for (p=pstart; p<pend+1; p++) { 959 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 960 cols[cnt++] = k + nc*(slot + gnx*l + p); 961 } 962 } 963 } 964 rows[k] = k + nc*(slot); 965 } 966 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 967 } 968 } 969 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 970 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 971 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 972 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 973 974 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 975 976 /* 977 For each node in the grid: we get the neighbors in the local (on processor ordering 978 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 979 PETSc ordering. 980 */ 981 if (!da->prealloc_only) { 982 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 983 for (i=xs; i<xs+nx; i++) { 984 985 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 986 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 987 988 for (j=ys; j<ys+ny; j++) { 989 slot = i - gxs + gnx*(j - gys); 990 991 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 992 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 993 994 cnt = 0; 995 for (k=0; k<nc; k++) { 996 for (l=lstart; l<lend+1; l++) { 997 for (p=pstart; p<pend+1; p++) { 998 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 999 cols[cnt++] = k + nc*(slot + gnx*l + p); 1000 } 1001 } 1002 } 1003 rows[k] = k + nc*(slot); 1004 } 1005 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1006 } 1007 } 1008 ierr = PetscFree(values);CHKERRQ(ierr); 1009 /* do not copy values to GPU since they are all zero and not yet needed there */ 1010 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1011 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1012 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1013 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1014 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1015 } 1016 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1021 { 1022 PetscErrorCode ierr; 1023 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1024 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1025 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1026 MPI_Comm comm; 1027 PetscScalar *values; 1028 DMBoundaryType bx,by,bz; 1029 ISLocalToGlobalMapping ltog; 1030 DMDAStencilType st; 1031 1032 PetscFunctionBegin; 1033 /* 1034 nc - number of components per grid point 1035 col - number of colors needed in one direction for single component problem 1036 1037 */ 1038 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1039 col = 2*s + 1; 1040 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1041 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1042 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1043 1044 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1045 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1046 1047 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1048 /* determine the matrix preallocation information */ 1049 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1050 for (i=xs; i<xs+nx; i++) { 1051 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1052 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1053 for (j=ys; j<ys+ny; j++) { 1054 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1055 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1056 for (k=zs; k<zs+nz; k++) { 1057 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1058 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1059 1060 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1061 1062 cnt = 0; 1063 for (l=0; l<nc; l++) { 1064 for (ii=istart; ii<iend+1; ii++) { 1065 for (jj=jstart; jj<jend+1; jj++) { 1066 for (kk=kstart; kk<kend+1; kk++) { 1067 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1068 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1069 } 1070 } 1071 } 1072 } 1073 rows[l] = l + nc*(slot); 1074 } 1075 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1076 } 1077 } 1078 } 1079 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1080 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1081 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1082 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1083 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1084 1085 /* 1086 For each node in the grid: we get the neighbors in the local (on processor ordering 1087 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1088 PETSc ordering. 1089 */ 1090 if (!da->prealloc_only) { 1091 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1092 for (i=xs; i<xs+nx; i++) { 1093 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1094 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1095 for (j=ys; j<ys+ny; j++) { 1096 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1097 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1098 for (k=zs; k<zs+nz; k++) { 1099 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1100 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1101 1102 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1103 1104 cnt = 0; 1105 for (l=0; l<nc; l++) { 1106 for (ii=istart; ii<iend+1; ii++) { 1107 for (jj=jstart; jj<jend+1; jj++) { 1108 for (kk=kstart; kk<kend+1; kk++) { 1109 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1110 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1111 } 1112 } 1113 } 1114 } 1115 rows[l] = l + nc*(slot); 1116 } 1117 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1118 } 1119 } 1120 } 1121 ierr = PetscFree(values);CHKERRQ(ierr); 1122 /* do not copy values to GPU since they are all zero and not yet needed there */ 1123 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1124 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1125 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1126 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1127 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1128 } 1129 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1134 { 1135 PetscErrorCode ierr; 1136 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1137 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1138 MPI_Comm comm; 1139 DMBoundaryType bx,by; 1140 ISLocalToGlobalMapping ltog,mltog; 1141 DMDAStencilType st; 1142 PetscBool removedups = PETSC_FALSE; 1143 1144 PetscFunctionBegin; 1145 /* 1146 nc - number of components per grid point 1147 col - number of colors needed in one direction for single component problem 1148 1149 */ 1150 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1151 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1152 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1153 } 1154 col = 2*s + 1; 1155 /* 1156 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1157 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1158 */ 1159 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1160 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1161 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1162 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1164 1165 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1166 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1167 1168 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1169 /* determine the matrix preallocation information */ 1170 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1171 for (i=xs; i<xs+nx; i++) { 1172 1173 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1174 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1175 1176 for (j=ys; j<ys+ny; j++) { 1177 slot = i - gxs + gnx*(j - gys); 1178 1179 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1180 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1181 1182 cnt = 0; 1183 for (k=0; k<nc; k++) { 1184 for (l=lstart; l<lend+1; l++) { 1185 for (p=pstart; p<pend+1; p++) { 1186 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1187 cols[cnt++] = k + nc*(slot + gnx*l + p); 1188 } 1189 } 1190 } 1191 rows[k] = k + nc*(slot); 1192 } 1193 if (removedups) { 1194 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1195 } else { 1196 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1197 } 1198 } 1199 } 1200 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1201 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1202 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1203 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1204 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1205 if (!mltog) { 1206 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1207 } 1208 1209 /* 1210 For each node in the grid: we get the neighbors in the local (on processor ordering 1211 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1212 PETSc ordering. 1213 */ 1214 if (!da->prealloc_only) { 1215 for (i=xs; i<xs+nx; i++) { 1216 1217 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1218 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1219 1220 for (j=ys; j<ys+ny; j++) { 1221 slot = i - gxs + gnx*(j - gys); 1222 1223 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1224 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1225 1226 cnt = 0; 1227 for (l=lstart; l<lend+1; l++) { 1228 for (p=pstart; p<pend+1; p++) { 1229 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1230 cols[cnt++] = nc*(slot + gnx*l + p); 1231 for (k=1; k<nc; k++) { 1232 cols[cnt] = 1 + cols[cnt-1];cnt++; 1233 } 1234 } 1235 } 1236 } 1237 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1238 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1239 } 1240 } 1241 /* do not copy values to GPU since they are all zero and not yet needed there */ 1242 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1243 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1244 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1245 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1246 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1247 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1248 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1249 } 1250 } 1251 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1256 { 1257 PetscErrorCode ierr; 1258 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1259 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1260 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1261 DM_DA *dd = (DM_DA*)da->data; 1262 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1263 MPI_Comm comm; 1264 DMBoundaryType bx,by; 1265 ISLocalToGlobalMapping ltog; 1266 DMDAStencilType st; 1267 PetscBool removedups = PETSC_FALSE; 1268 1269 PetscFunctionBegin; 1270 /* 1271 nc - number of components per grid point 1272 col - number of colors needed in one direction for single component problem 1273 1274 */ 1275 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1276 col = 2*s + 1; 1277 /* 1278 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1279 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1280 */ 1281 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1282 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1283 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1284 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1286 1287 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1288 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1289 1290 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1291 /* determine the matrix preallocation information */ 1292 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1293 for (i=xs; i<xs+nx; i++) { 1294 1295 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1296 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1297 1298 for (j=ys; j<ys+ny; j++) { 1299 slot = i - gxs + gnx*(j - gys); 1300 1301 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1302 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1303 1304 for (k=0; k<nc; k++) { 1305 cnt = 0; 1306 for (l=lstart; l<lend+1; l++) { 1307 for (p=pstart; p<pend+1; p++) { 1308 if (l || p) { 1309 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1310 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1311 } 1312 } else { 1313 if (dfill) { 1314 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1315 } else { 1316 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1317 } 1318 } 1319 } 1320 } 1321 row = k + nc*(slot); 1322 maxcnt = PetscMax(maxcnt,cnt); 1323 if (removedups) { 1324 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1325 } else { 1326 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1327 } 1328 } 1329 } 1330 } 1331 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1332 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1333 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1334 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1335 1336 /* 1337 For each node in the grid: we get the neighbors in the local (on processor ordering 1338 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1339 PETSc ordering. 1340 */ 1341 if (!da->prealloc_only) { 1342 for (i=xs; i<xs+nx; i++) { 1343 1344 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1345 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1346 1347 for (j=ys; j<ys+ny; j++) { 1348 slot = i - gxs + gnx*(j - gys); 1349 1350 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1351 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1352 1353 for (k=0; k<nc; k++) { 1354 cnt = 0; 1355 for (l=lstart; l<lend+1; l++) { 1356 for (p=pstart; p<pend+1; p++) { 1357 if (l || p) { 1358 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1359 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1360 } 1361 } else { 1362 if (dfill) { 1363 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1364 } else { 1365 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1366 } 1367 } 1368 } 1369 } 1370 row = k + nc*(slot); 1371 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1372 } 1373 } 1374 } 1375 /* do not copy values to GPU since they are all zero and not yet needed there */ 1376 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1377 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1380 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(cols);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* ---------------------------------------------------------------------------------*/ 1387 1388 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1389 { 1390 PetscErrorCode ierr; 1391 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1392 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1393 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1394 MPI_Comm comm; 1395 DMBoundaryType bx,by,bz; 1396 ISLocalToGlobalMapping ltog,mltog; 1397 DMDAStencilType st; 1398 PetscBool removedups = PETSC_FALSE; 1399 1400 PetscFunctionBegin; 1401 /* 1402 nc - number of components per grid point 1403 col - number of colors needed in one direction for single component problem 1404 1405 */ 1406 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1407 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1408 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1409 } 1410 col = 2*s + 1; 1411 1412 /* 1413 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1414 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1415 */ 1416 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1417 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1418 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1419 1420 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1421 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1422 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1423 1424 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1425 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1426 1427 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1428 /* determine the matrix preallocation information */ 1429 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1430 for (i=xs; i<xs+nx; i++) { 1431 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1432 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1433 for (j=ys; j<ys+ny; j++) { 1434 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1435 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1436 for (k=zs; k<zs+nz; k++) { 1437 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1438 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1439 1440 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1441 1442 cnt = 0; 1443 for (l=0; l<nc; l++) { 1444 for (ii=istart; ii<iend+1; ii++) { 1445 for (jj=jstart; jj<jend+1; jj++) { 1446 for (kk=kstart; kk<kend+1; kk++) { 1447 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1448 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1449 } 1450 } 1451 } 1452 } 1453 rows[l] = l + nc*(slot); 1454 } 1455 if (removedups) { 1456 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1457 } else { 1458 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1459 } 1460 } 1461 } 1462 } 1463 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1464 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1465 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1466 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1467 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1468 if (!mltog) { 1469 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1470 } 1471 1472 /* 1473 For each node in the grid: we get the neighbors in the local (on processor ordering 1474 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1475 PETSc ordering. 1476 */ 1477 if (!da->prealloc_only) { 1478 for (i=xs; i<xs+nx; i++) { 1479 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1480 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1481 for (j=ys; j<ys+ny; j++) { 1482 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1483 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1484 for (k=zs; k<zs+nz; k++) { 1485 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1486 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1487 1488 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1489 1490 cnt = 0; 1491 for (kk=kstart; kk<kend+1; kk++) { 1492 for (jj=jstart; jj<jend+1; jj++) { 1493 for (ii=istart; ii<iend+1; ii++) { 1494 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1495 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1496 for (l=1; l<nc; l++) { 1497 cols[cnt] = 1 + cols[cnt-1];cnt++; 1498 } 1499 } 1500 } 1501 } 1502 } 1503 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1504 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1505 } 1506 } 1507 } 1508 /* do not copy values to GPU since they are all zero and not yet needed there */ 1509 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1510 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1513 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1514 } 1515 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1516 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1517 } 1518 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 /* ---------------------------------------------------------------------------------*/ 1523 1524 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1525 { 1526 PetscErrorCode ierr; 1527 DM_DA *dd = (DM_DA*)da->data; 1528 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1529 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1530 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1531 DMBoundaryType bx; 1532 ISLocalToGlobalMapping ltog; 1533 PetscMPIInt rank,size; 1534 1535 PetscFunctionBegin; 1536 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1537 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1538 1539 /* 1540 nc - number of components per grid point 1541 1542 */ 1543 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1544 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1545 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1546 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1547 1548 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1549 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1550 1551 /* 1552 note should be smaller for first and last process with no periodic 1553 does not handle dfill 1554 */ 1555 cnt = 0; 1556 /* coupling with process to the left */ 1557 for (i=0; i<s; i++) { 1558 for (j=0; j<nc; j++) { 1559 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1560 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1561 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1562 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1563 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1564 } 1565 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1566 cnt++; 1567 } 1568 } 1569 for (i=s; i<nx-s; i++) { 1570 for (j=0; j<nc; j++) { 1571 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1572 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1573 cnt++; 1574 } 1575 } 1576 /* coupling with process to the right */ 1577 for (i=nx-s; i<nx; i++) { 1578 for (j=0; j<nc; j++) { 1579 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1580 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1581 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1582 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1583 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1584 } 1585 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1586 cnt++; 1587 } 1588 } 1589 1590 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1591 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1592 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1593 1594 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1595 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1596 1597 /* 1598 For each node in the grid: we get the neighbors in the local (on processor ordering 1599 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1600 PETSc ordering. 1601 */ 1602 if (!da->prealloc_only) { 1603 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1604 row = xs*nc; 1605 /* coupling with process to the left */ 1606 for (i=xs; i<xs+s; i++) { 1607 for (j=0; j<nc; j++) { 1608 cnt = 0; 1609 if (rank) { 1610 for (l=0; l<s; l++) { 1611 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1612 } 1613 } 1614 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1615 for (l=0; l<s; l++) { 1616 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1617 } 1618 } 1619 if (dfill) { 1620 for (k=dfill[j]; k<dfill[j+1]; k++) { 1621 cols[cnt++] = i*nc + dfill[k]; 1622 } 1623 } else { 1624 for (k=0; k<nc; k++) { 1625 cols[cnt++] = i*nc + k; 1626 } 1627 } 1628 for (l=0; l<s; l++) { 1629 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1630 } 1631 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1632 row++; 1633 } 1634 } 1635 for (i=xs+s; i<xs+nx-s; i++) { 1636 for (j=0; j<nc; j++) { 1637 cnt = 0; 1638 for (l=0; l<s; l++) { 1639 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1640 } 1641 if (dfill) { 1642 for (k=dfill[j]; k<dfill[j+1]; k++) { 1643 cols[cnt++] = i*nc + dfill[k]; 1644 } 1645 } else { 1646 for (k=0; k<nc; k++) { 1647 cols[cnt++] = i*nc + k; 1648 } 1649 } 1650 for (l=0; l<s; l++) { 1651 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1652 } 1653 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1654 row++; 1655 } 1656 } 1657 /* coupling with process to the right */ 1658 for (i=xs+nx-s; i<xs+nx; i++) { 1659 for (j=0; j<nc; j++) { 1660 cnt = 0; 1661 for (l=0; l<s; l++) { 1662 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1663 } 1664 if (dfill) { 1665 for (k=dfill[j]; k<dfill[j+1]; k++) { 1666 cols[cnt++] = i*nc + dfill[k]; 1667 } 1668 } else { 1669 for (k=0; k<nc; k++) { 1670 cols[cnt++] = i*nc + k; 1671 } 1672 } 1673 if (rank < size-1) { 1674 for (l=0; l<s; l++) { 1675 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1676 } 1677 } 1678 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1679 for (l=0; l<s; l++) { 1680 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1681 } 1682 } 1683 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1684 row++; 1685 } 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 /* do not copy values to GPU since they are all zero and not yet needed there */ 1689 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1690 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1693 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 /* ---------------------------------------------------------------------------------*/ 1699 1700 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1701 { 1702 PetscErrorCode ierr; 1703 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1704 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1705 PetscInt istart,iend; 1706 DMBoundaryType bx; 1707 ISLocalToGlobalMapping ltog,mltog; 1708 1709 PetscFunctionBegin; 1710 /* 1711 nc - number of components per grid point 1712 col - number of colors needed in one direction for single component problem 1713 1714 */ 1715 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1716 if (!isIS && bx == DM_BOUNDARY_NONE) { 1717 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1718 } 1719 col = 2*s + 1; 1720 1721 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1722 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1723 1724 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1725 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1726 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1727 1728 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1729 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1730 if (!mltog) { 1731 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1732 } 1733 1734 /* 1735 For each node in the grid: we get the neighbors in the local (on processor ordering 1736 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1737 PETSc ordering. 1738 */ 1739 if (!da->prealloc_only) { 1740 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1741 for (i=xs; i<xs+nx; i++) { 1742 istart = PetscMax(-s,gxs - i); 1743 iend = PetscMin(s,gxs + gnx - i - 1); 1744 slot = i - gxs; 1745 1746 cnt = 0; 1747 for (i1=istart; i1<iend+1; i1++) { 1748 cols[cnt++] = nc*(slot + i1); 1749 for (l=1; l<nc; l++) { 1750 cols[cnt] = 1 + cols[cnt-1];cnt++; 1751 } 1752 } 1753 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1754 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1755 } 1756 /* do not copy values to GPU since they are all zero and not yet needed there */ 1757 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1758 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1760 if (!isIS && bx == DM_BOUNDARY_NONE) { 1761 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1762 } 1763 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1764 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1765 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 /* ---------------------------------------------------------------------------------*/ 1771 1772 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1773 { 1774 PetscErrorCode ierr; 1775 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1776 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1777 PetscInt istart,iend; 1778 DMBoundaryType bx; 1779 ISLocalToGlobalMapping ltog,mltog; 1780 1781 PetscFunctionBegin; 1782 /* 1783 nc - number of components per grid point 1784 col - number of colors needed in one direction for single component problem 1785 */ 1786 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1787 col = 2*s + 1; 1788 1789 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1790 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1791 1792 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1793 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1794 1795 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1796 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1797 if (!mltog) { 1798 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1799 } 1800 1801 /* 1802 For each node in the grid: we get the neighbors in the local (on processor ordering 1803 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1804 PETSc ordering. 1805 */ 1806 if (!da->prealloc_only) { 1807 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1808 for (i=xs; i<xs+nx; i++) { 1809 istart = PetscMax(-s,gxs - i); 1810 iend = PetscMin(s,gxs + gnx - i - 1); 1811 slot = i - gxs; 1812 1813 cnt = 0; 1814 for (i1=istart; i1<iend+1; i1++) { 1815 cols[cnt++] = nc*(slot + i1); 1816 for (l=1; l<nc; l++) { 1817 cols[cnt] = 1 + cols[cnt-1];cnt++; 1818 } 1819 } 1820 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1821 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1822 } 1823 /* do not copy values to GPU since they are all zero and not yet needed there */ 1824 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1825 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1827 if (!isIS && bx == DM_BOUNDARY_NONE) { 1828 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1829 } 1830 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1831 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1832 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1833 } 1834 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1839 { 1840 PetscErrorCode ierr; 1841 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1842 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1843 PetscInt istart,iend,jstart,jend,ii,jj; 1844 MPI_Comm comm; 1845 PetscScalar *values; 1846 DMBoundaryType bx,by; 1847 DMDAStencilType st; 1848 ISLocalToGlobalMapping ltog; 1849 1850 PetscFunctionBegin; 1851 /* 1852 nc - number of components per grid point 1853 col - number of colors needed in one direction for single component problem 1854 */ 1855 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1856 col = 2*s + 1; 1857 1858 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1859 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1860 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1861 1862 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1863 1864 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1865 1866 /* determine the matrix preallocation information */ 1867 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1868 for (i=xs; i<xs+nx; i++) { 1869 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1870 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1871 for (j=ys; j<ys+ny; j++) { 1872 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1873 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1874 slot = i - gxs + gnx*(j - gys); 1875 1876 /* Find block columns in block row */ 1877 cnt = 0; 1878 for (ii=istart; ii<iend+1; ii++) { 1879 for (jj=jstart; jj<jend+1; jj++) { 1880 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1881 cols[cnt++] = slot + ii + gnx*jj; 1882 } 1883 } 1884 } 1885 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1886 } 1887 } 1888 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1889 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1890 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1891 1892 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1893 1894 /* 1895 For each node in the grid: we get the neighbors in the local (on processor ordering 1896 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1897 PETSc ordering. 1898 */ 1899 if (!da->prealloc_only) { 1900 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1901 for (i=xs; i<xs+nx; i++) { 1902 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1903 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1904 for (j=ys; j<ys+ny; j++) { 1905 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1906 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1907 slot = i - gxs + gnx*(j - gys); 1908 cnt = 0; 1909 for (ii=istart; ii<iend+1; ii++) { 1910 for (jj=jstart; jj<jend+1; jj++) { 1911 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1912 cols[cnt++] = slot + ii + gnx*jj; 1913 } 1914 } 1915 } 1916 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1917 } 1918 } 1919 ierr = PetscFree(values);CHKERRQ(ierr); 1920 /* do not copy values to GPU since they are all zero and not yet needed there */ 1921 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1922 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1923 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1924 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1925 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1926 } 1927 ierr = PetscFree(cols);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1932 { 1933 PetscErrorCode ierr; 1934 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1935 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1936 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1937 MPI_Comm comm; 1938 PetscScalar *values; 1939 DMBoundaryType bx,by,bz; 1940 DMDAStencilType st; 1941 ISLocalToGlobalMapping ltog; 1942 1943 PetscFunctionBegin; 1944 /* 1945 nc - number of components per grid point 1946 col - number of colors needed in one direction for single component problem 1947 1948 */ 1949 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1950 col = 2*s + 1; 1951 1952 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1953 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1954 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1955 1956 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1957 1958 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1959 1960 /* determine the matrix preallocation information */ 1961 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1962 for (i=xs; i<xs+nx; i++) { 1963 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1964 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1965 for (j=ys; j<ys+ny; j++) { 1966 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1967 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1968 for (k=zs; k<zs+nz; k++) { 1969 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1970 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1971 1972 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1973 1974 /* Find block columns in block row */ 1975 cnt = 0; 1976 for (ii=istart; ii<iend+1; ii++) { 1977 for (jj=jstart; jj<jend+1; jj++) { 1978 for (kk=kstart; kk<kend+1; kk++) { 1979 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1980 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1981 } 1982 } 1983 } 1984 } 1985 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1986 } 1987 } 1988 } 1989 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1990 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1991 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1992 1993 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1994 1995 /* 1996 For each node in the grid: we get the neighbors in the local (on processor ordering 1997 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1998 PETSc ordering. 1999 */ 2000 if (!da->prealloc_only) { 2001 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2002 for (i=xs; i<xs+nx; i++) { 2003 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2004 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2005 for (j=ys; j<ys+ny; j++) { 2006 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2007 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2008 for (k=zs; k<zs+nz; k++) { 2009 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2010 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2011 2012 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2013 2014 cnt = 0; 2015 for (ii=istart; ii<iend+1; ii++) { 2016 for (jj=jstart; jj<jend+1; jj++) { 2017 for (kk=kstart; kk<kend+1; kk++) { 2018 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2019 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2020 } 2021 } 2022 } 2023 } 2024 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2025 } 2026 } 2027 } 2028 ierr = PetscFree(values);CHKERRQ(ierr); 2029 /* do not copy values to GPU since they are all zero and not yet needed there */ 2030 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2031 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2034 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2035 } 2036 ierr = PetscFree(cols);CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 /* 2041 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2042 identify in the local ordering with periodic domain. 2043 */ 2044 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2045 { 2046 PetscErrorCode ierr; 2047 PetscInt i,n; 2048 2049 PetscFunctionBegin; 2050 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2051 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2052 for (i=0,n=0; i<*cnt; i++) { 2053 if (col[i] >= *row) col[n++] = col[i]; 2054 } 2055 *cnt = n; 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2060 { 2061 PetscErrorCode ierr; 2062 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2063 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2064 PetscInt istart,iend,jstart,jend,ii,jj; 2065 MPI_Comm comm; 2066 PetscScalar *values; 2067 DMBoundaryType bx,by; 2068 DMDAStencilType st; 2069 ISLocalToGlobalMapping ltog; 2070 2071 PetscFunctionBegin; 2072 /* 2073 nc - number of components per grid point 2074 col - number of colors needed in one direction for single component problem 2075 */ 2076 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2077 col = 2*s + 1; 2078 2079 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2080 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2081 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2082 2083 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2084 2085 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2086 2087 /* determine the matrix preallocation information */ 2088 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2089 for (i=xs; i<xs+nx; i++) { 2090 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2091 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2092 for (j=ys; j<ys+ny; j++) { 2093 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2094 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2095 slot = i - gxs + gnx*(j - gys); 2096 2097 /* Find block columns in block row */ 2098 cnt = 0; 2099 for (ii=istart; ii<iend+1; ii++) { 2100 for (jj=jstart; jj<jend+1; jj++) { 2101 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2102 cols[cnt++] = slot + ii + gnx*jj; 2103 } 2104 } 2105 } 2106 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2107 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2108 } 2109 } 2110 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2111 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2112 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2113 2114 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2115 2116 /* 2117 For each node in the grid: we get the neighbors in the local (on processor ordering 2118 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2119 PETSc ordering. 2120 */ 2121 if (!da->prealloc_only) { 2122 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2123 for (i=xs; i<xs+nx; i++) { 2124 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2125 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2126 for (j=ys; j<ys+ny; j++) { 2127 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2128 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2129 slot = i - gxs + gnx*(j - gys); 2130 2131 /* Find block columns in block row */ 2132 cnt = 0; 2133 for (ii=istart; ii<iend+1; ii++) { 2134 for (jj=jstart; jj<jend+1; jj++) { 2135 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2136 cols[cnt++] = slot + ii + gnx*jj; 2137 } 2138 } 2139 } 2140 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2141 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2142 } 2143 } 2144 ierr = PetscFree(values);CHKERRQ(ierr); 2145 /* do not copy values to GPU since they are all zero and not yet needed there */ 2146 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2147 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2150 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2151 } 2152 ierr = PetscFree(cols);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2157 { 2158 PetscErrorCode ierr; 2159 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2160 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2161 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2162 MPI_Comm comm; 2163 PetscScalar *values; 2164 DMBoundaryType bx,by,bz; 2165 DMDAStencilType st; 2166 ISLocalToGlobalMapping ltog; 2167 2168 PetscFunctionBegin; 2169 /* 2170 nc - number of components per grid point 2171 col - number of colors needed in one direction for single component problem 2172 */ 2173 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2174 col = 2*s + 1; 2175 2176 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2177 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2178 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2179 2180 /* create the matrix */ 2181 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2182 2183 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2184 2185 /* determine the matrix preallocation information */ 2186 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2187 for (i=xs; i<xs+nx; i++) { 2188 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2189 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2190 for (j=ys; j<ys+ny; j++) { 2191 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2192 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2193 for (k=zs; k<zs+nz; k++) { 2194 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2195 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2196 2197 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2198 2199 /* Find block columns in block row */ 2200 cnt = 0; 2201 for (ii=istart; ii<iend+1; ii++) { 2202 for (jj=jstart; jj<jend+1; jj++) { 2203 for (kk=kstart; kk<kend+1; kk++) { 2204 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2205 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2206 } 2207 } 2208 } 2209 } 2210 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2211 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2212 } 2213 } 2214 } 2215 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2216 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2217 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2218 2219 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2220 2221 /* 2222 For each node in the grid: we get the neighbors in the local (on processor ordering 2223 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2224 PETSc ordering. 2225 */ 2226 if (!da->prealloc_only) { 2227 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2228 for (i=xs; i<xs+nx; i++) { 2229 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2230 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2231 for (j=ys; j<ys+ny; j++) { 2232 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2233 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2234 for (k=zs; k<zs+nz; k++) { 2235 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2236 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2237 2238 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2239 2240 cnt = 0; 2241 for (ii=istart; ii<iend+1; ii++) { 2242 for (jj=jstart; jj<jend+1; jj++) { 2243 for (kk=kstart; kk<kend+1; kk++) { 2244 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2245 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2246 } 2247 } 2248 } 2249 } 2250 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2251 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2252 } 2253 } 2254 } 2255 ierr = PetscFree(values);CHKERRQ(ierr); 2256 /* do not copy values to GPU since they are all zero and not yet needed there */ 2257 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2258 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2260 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2261 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2262 } 2263 ierr = PetscFree(cols);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 /* ---------------------------------------------------------------------------------*/ 2268 2269 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2270 { 2271 PetscErrorCode ierr; 2272 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2273 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2274 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2275 DM_DA *dd = (DM_DA*)da->data; 2276 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2277 MPI_Comm comm; 2278 PetscScalar *values; 2279 DMBoundaryType bx,by,bz; 2280 ISLocalToGlobalMapping ltog; 2281 DMDAStencilType st; 2282 PetscBool removedups = PETSC_FALSE; 2283 2284 PetscFunctionBegin; 2285 /* 2286 nc - number of components per grid point 2287 col - number of colors needed in one direction for single component problem 2288 2289 */ 2290 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2291 col = 2*s + 1; 2292 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2293 by 2*stencil_width + 1\n"); 2294 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2295 by 2*stencil_width + 1\n"); 2296 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2297 by 2*stencil_width + 1\n"); 2298 2299 /* 2300 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2301 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2302 */ 2303 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2304 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2305 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2306 2307 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2308 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2309 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2310 2311 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2312 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2313 2314 /* determine the matrix preallocation information */ 2315 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2316 2317 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2318 for (i=xs; i<xs+nx; i++) { 2319 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2320 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2321 for (j=ys; j<ys+ny; j++) { 2322 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2323 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2324 for (k=zs; k<zs+nz; k++) { 2325 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2326 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2327 2328 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2329 2330 for (l=0; l<nc; l++) { 2331 cnt = 0; 2332 for (ii=istart; ii<iend+1; ii++) { 2333 for (jj=jstart; jj<jend+1; jj++) { 2334 for (kk=kstart; kk<kend+1; kk++) { 2335 if (ii || jj || kk) { 2336 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2337 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2338 } 2339 } else { 2340 if (dfill) { 2341 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2342 } else { 2343 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2344 } 2345 } 2346 } 2347 } 2348 } 2349 row = l + nc*(slot); 2350 maxcnt = PetscMax(maxcnt,cnt); 2351 if (removedups) { 2352 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2353 } else { 2354 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2355 } 2356 } 2357 } 2358 } 2359 } 2360 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2361 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2362 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2363 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2364 2365 /* 2366 For each node in the grid: we get the neighbors in the local (on processor ordering 2367 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2368 PETSc ordering. 2369 */ 2370 if (!da->prealloc_only) { 2371 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2372 for (i=xs; i<xs+nx; i++) { 2373 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2374 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2375 for (j=ys; j<ys+ny; j++) { 2376 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2377 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2378 for (k=zs; k<zs+nz; k++) { 2379 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2380 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2381 2382 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2383 2384 for (l=0; l<nc; l++) { 2385 cnt = 0; 2386 for (ii=istart; ii<iend+1; ii++) { 2387 for (jj=jstart; jj<jend+1; jj++) { 2388 for (kk=kstart; kk<kend+1; kk++) { 2389 if (ii || jj || kk) { 2390 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2391 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2392 } 2393 } else { 2394 if (dfill) { 2395 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2396 } else { 2397 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2398 } 2399 } 2400 } 2401 } 2402 } 2403 row = l + nc*(slot); 2404 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2405 } 2406 } 2407 } 2408 } 2409 ierr = PetscFree(values);CHKERRQ(ierr); 2410 /* do not copy values to GPU since they are all zero and not yet needed there */ 2411 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2412 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2413 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2414 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2415 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2416 } 2417 ierr = PetscFree(cols);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420