1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscErrorCode ierr; 54 PetscInt nz; 55 56 PetscFunctionBegin; 57 if (!dfillsparse) PetscFunctionReturn(0); 58 59 /* Determine number of non-zeros */ 60 nz = (dfillsparse[w] - w - 1); 61 62 /* Allocate space for our copy of the given sparse matrix representation. */ 63 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 64 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 69 { 70 PetscErrorCode ierr; 71 PetscInt i,k,cnt = 1; 72 73 PetscFunctionBegin; 74 75 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 76 columns to the left with any nonzeros in them plus 1 */ 77 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 78 for (i=0; i<dd->w; i++) { 79 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 80 } 81 for (i=0; i<dd->w; i++) { 82 if (dd->ofillcols[i]) { 83 dd->ofillcols[i] = cnt++; 84 } 85 } 86 PetscFunctionReturn(0); 87 } 88 89 /*@ 90 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 91 of the matrix returned by DMCreateMatrix(). 92 93 Logically Collective on da 94 95 Input Parameter: 96 + da - the distributed array 97 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 98 - ofill - the fill pattern in the off-diagonal blocks 99 100 Level: developer 101 102 Notes: 103 This only makes sense when you are doing multicomponent problems but using the 104 MPIAIJ matrix format 105 106 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 107 representing coupling and 0 entries for missing coupling. For example 108 $ dfill[9] = {1, 0, 0, 109 $ 1, 1, 0, 110 $ 0, 1, 1} 111 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 112 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 113 diagonal block). 114 115 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 116 can be represented in the dfill, ofill format 117 118 Contributed by Glenn Hammond 119 120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 121 122 @*/ 123 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 124 { 125 DM_DA *dd = (DM_DA*)da->data; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 /* save the given dfill and ofill information */ 130 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 131 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 132 133 /* count nonzeros in ofill columns */ 134 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 /*@ 140 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 141 of the matrix returned by DMCreateMatrix(), using sparse representations 142 of fill patterns. 143 144 Logically Collective on da 145 146 Input Parameter: 147 + da - the distributed array 148 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 149 - ofill - the sparse fill pattern in the off-diagonal blocks 150 151 Level: developer 152 153 Notes: This only makes sense when you are doing multicomponent problems but using the 154 MPIAIJ matrix format 155 156 The format for dfill and ofill is a sparse representation of a 157 dof-by-dof matrix with 1 entries representing coupling and 0 entries 158 for missing coupling. The sparse representation is a 1 dimensional 159 array of length nz + dof + 1, where nz is the number of non-zeros in 160 the matrix. The first dof entries in the array give the 161 starting array indices of each row's items in the rest of the array, 162 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 163 and the remaining nz items give the column indices of each of 164 the 1s within the logical 2D matrix. Each row's items within 165 the array are the column indices of the 1s within that row 166 of the 2D matrix. PETSc developers may recognize that this is the 167 same format as that computed by the DMDASetBlockFills_Private() 168 function from a dense 2D matrix representation. 169 170 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 171 can be represented in the dfill, ofill format 172 173 Contributed by Philip C. Roth 174 175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 176 177 @*/ 178 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 179 { 180 DM_DA *dd = (DM_DA*)da->data; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* save the given dfill and ofill information */ 185 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 186 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 187 188 /* count nonzeros in ofill columns */ 189 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 190 191 PetscFunctionReturn(0); 192 } 193 194 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 195 { 196 PetscErrorCode ierr; 197 PetscInt dim,m,n,p,nc; 198 DMBoundaryType bx,by,bz; 199 MPI_Comm comm; 200 PetscMPIInt size; 201 PetscBool isBAIJ; 202 DM_DA *dd = (DM_DA*)da->data; 203 204 PetscFunctionBegin; 205 /* 206 m 207 ------------------------------------------------------ 208 | | 209 | | 210 | ---------------------- | 211 | | | | 212 n | yn | | | 213 | | | | 214 | .--------------------- | 215 | (xs,ys) xn | 216 | . | 217 | (gxs,gys) | 218 | | 219 ----------------------------------------------------- 220 */ 221 222 /* 223 nc - number of components per grid point 224 col - number of colors needed in one direction for single component problem 225 226 */ 227 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 228 229 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 230 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 231 if (ctype == IS_COLORING_LOCAL) { 232 if (size == 1) { 233 ctype = IS_COLORING_GLOBAL; 234 } else if (dim > 1) { 235 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 236 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 237 } 238 } 239 } 240 241 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 242 matrices is for the blocks, not the individual matrix elements */ 243 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 244 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (isBAIJ) { 247 dd->w = 1; 248 dd->xs = dd->xs/nc; 249 dd->xe = dd->xe/nc; 250 dd->Xs = dd->Xs/nc; 251 dd->Xe = dd->Xe/nc; 252 } 253 254 /* 255 We do not provide a getcoloring function in the DMDA operations because 256 the basic DMDA does not know about matrices. We think of DMDA as being 257 more low-level then matrices. 258 */ 259 if (dim == 1) { 260 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 261 } else if (dim == 2) { 262 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 263 } else if (dim == 3) { 264 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 265 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 266 if (isBAIJ) { 267 dd->w = nc; 268 dd->xs = dd->xs*nc; 269 dd->xe = dd->xe*nc; 270 dd->Xs = dd->Xs*nc; 271 dd->Xe = dd->Xe*nc; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 /* ---------------------------------------------------------------------------------*/ 277 278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 279 { 280 PetscErrorCode ierr; 281 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 282 PetscInt ncolors; 283 MPI_Comm comm; 284 DMBoundaryType bx,by; 285 DMDAStencilType st; 286 ISColoringValue *colors; 287 DM_DA *dd = (DM_DA*)da->data; 288 289 PetscFunctionBegin; 290 /* 291 nc - number of components per grid point 292 col - number of colors needed in one direction for single component problem 293 294 */ 295 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 296 col = 2*s + 1; 297 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 298 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 299 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 300 301 /* special case as taught to us by Paul Hovland */ 302 if (st == DMDA_STENCIL_STAR && s == 1) { 303 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 304 } else { 305 if (ctype == IS_COLORING_GLOBAL) { 306 if (!dd->localcoloring) { 307 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 308 ii = 0; 309 for (j=ys; j<ys+ny; j++) { 310 for (i=xs; i<xs+nx; i++) { 311 for (k=0; k<nc; k++) { 312 colors[ii++] = k + nc*((i % col) + col*(j % col)); 313 } 314 } 315 } 316 ncolors = nc + nc*(col-1 + col*(col-1)); 317 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 318 } 319 *coloring = dd->localcoloring; 320 } else if (ctype == IS_COLORING_LOCAL) { 321 if (!dd->ghostedcoloring) { 322 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 323 ii = 0; 324 for (j=gys; j<gys+gny; j++) { 325 for (i=gxs; i<gxs+gnx; i++) { 326 for (k=0; k<nc; k++) { 327 /* the complicated stuff is to handle periodic boundaries */ 328 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 329 } 330 } 331 } 332 ncolors = nc + nc*(col - 1 + col*(col-1)); 333 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 334 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 335 336 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 337 } 338 *coloring = dd->ghostedcoloring; 339 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 340 } 341 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* ---------------------------------------------------------------------------------*/ 346 347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 348 { 349 PetscErrorCode ierr; 350 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 351 PetscInt ncolors; 352 MPI_Comm comm; 353 DMBoundaryType bx,by,bz; 354 DMDAStencilType st; 355 ISColoringValue *colors; 356 DM_DA *dd = (DM_DA*)da->data; 357 358 PetscFunctionBegin; 359 /* 360 nc - number of components per grid point 361 col - number of colors needed in one direction for single component problem 362 363 */ 364 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 365 col = 2*s + 1; 366 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 367 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 368 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 369 370 /* create the coloring */ 371 if (ctype == IS_COLORING_GLOBAL) { 372 if (!dd->localcoloring) { 373 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 374 ii = 0; 375 for (k=zs; k<zs+nz; k++) { 376 for (j=ys; j<ys+ny; j++) { 377 for (i=xs; i<xs+nx; i++) { 378 for (l=0; l<nc; l++) { 379 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 385 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 386 } 387 *coloring = dd->localcoloring; 388 } else if (ctype == IS_COLORING_LOCAL) { 389 if (!dd->ghostedcoloring) { 390 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 391 ii = 0; 392 for (k=gzs; k<gzs+gnz; k++) { 393 for (j=gys; j<gys+gny; j++) { 394 for (i=gxs; i<gxs+gnx; i++) { 395 for (l=0; l<nc; l++) { 396 /* the complicated stuff is to handle periodic boundaries */ 397 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 398 } 399 } 400 } 401 } 402 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 403 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 404 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 405 } 406 *coloring = dd->ghostedcoloring; 407 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 408 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* ---------------------------------------------------------------------------------*/ 413 414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 415 { 416 PetscErrorCode ierr; 417 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 418 PetscInt ncolors; 419 MPI_Comm comm; 420 DMBoundaryType bx; 421 ISColoringValue *colors; 422 DM_DA *dd = (DM_DA*)da->data; 423 424 PetscFunctionBegin; 425 /* 426 nc - number of components per grid point 427 col - number of colors needed in one direction for single component problem 428 429 */ 430 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 431 col = 2*s + 1; 432 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 433 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 434 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 435 436 /* create the coloring */ 437 if (ctype == IS_COLORING_GLOBAL) { 438 if (!dd->localcoloring) { 439 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 440 if (dd->ofillcols) { 441 PetscInt tc = 0; 442 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 443 i1 = 0; 444 for (i=xs; i<xs+nx; i++) { 445 for (l=0; l<nc; l++) { 446 if (dd->ofillcols[l] && (i % col)) { 447 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 448 } else { 449 colors[i1++] = l; 450 } 451 } 452 } 453 ncolors = nc + 2*s*tc; 454 } else { 455 i1 = 0; 456 for (i=xs; i<xs+nx; i++) { 457 for (l=0; l<nc; l++) { 458 colors[i1++] = l + nc*(i % col); 459 } 460 } 461 ncolors = nc + nc*(col-1); 462 } 463 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 464 } 465 *coloring = dd->localcoloring; 466 } else if (ctype == IS_COLORING_LOCAL) { 467 if (!dd->ghostedcoloring) { 468 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 469 i1 = 0; 470 for (i=gxs; i<gxs+gnx; i++) { 471 for (l=0; l<nc; l++) { 472 /* the complicated stuff is to handle periodic boundaries */ 473 colors[i1++] = l + nc*(SetInRange(i,m) % col); 474 } 475 } 476 ncolors = nc + nc*(col-1); 477 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 478 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 479 } 480 *coloring = dd->ghostedcoloring; 481 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 482 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 487 { 488 PetscErrorCode ierr; 489 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 490 PetscInt ncolors; 491 MPI_Comm comm; 492 DMBoundaryType bx,by; 493 ISColoringValue *colors; 494 DM_DA *dd = (DM_DA*)da->data; 495 496 PetscFunctionBegin; 497 /* 498 nc - number of components per grid point 499 col - number of colors needed in one direction for single component problem 500 501 */ 502 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 503 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 505 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 506 /* create the coloring */ 507 if (ctype == IS_COLORING_GLOBAL) { 508 if (!dd->localcoloring) { 509 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 510 ii = 0; 511 for (j=ys; j<ys+ny; j++) { 512 for (i=xs; i<xs+nx; i++) { 513 for (k=0; k<nc; k++) { 514 colors[ii++] = k + nc*((3*j+i) % 5); 515 } 516 } 517 } 518 ncolors = 5*nc; 519 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 520 } 521 *coloring = dd->localcoloring; 522 } else if (ctype == IS_COLORING_LOCAL) { 523 if (!dd->ghostedcoloring) { 524 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 525 ii = 0; 526 for (j=gys; j<gys+gny; j++) { 527 for (i=gxs; i<gxs+gnx; i++) { 528 for (k=0; k<nc; k++) { 529 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 530 } 531 } 532 } 533 ncolors = 5*nc; 534 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 535 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 536 } 537 *coloring = dd->ghostedcoloring; 538 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 539 PetscFunctionReturn(0); 540 } 541 542 /* =========================================================================== */ 543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 557 558 /*@C 559 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 560 561 Logically Collective on mat 562 563 Input Parameters: 564 + mat - the matrix 565 - da - the da 566 567 Level: intermediate 568 569 @*/ 570 PetscErrorCode MatSetupDM(Mat mat,DM da) 571 { 572 PetscErrorCode ierr; 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 576 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 577 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 582 { 583 DM da; 584 PetscErrorCode ierr; 585 const char *prefix; 586 Mat Anatural; 587 AO ao; 588 PetscInt rstart,rend,*petsc,i; 589 IS is; 590 MPI_Comm comm; 591 PetscViewerFormat format; 592 593 PetscFunctionBegin; 594 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 597 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 600 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 601 602 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 603 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 604 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 605 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 606 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 607 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 608 609 /* call viewer on natural ordering */ 610 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 611 ierr = ISDestroy(&is);CHKERRQ(ierr); 612 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 613 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 615 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 616 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 617 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 618 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 623 { 624 DM da; 625 PetscErrorCode ierr; 626 Mat Anatural,Aapp; 627 AO ao; 628 PetscInt rstart,rend,*app,i,m,n,M,N; 629 IS is; 630 MPI_Comm comm; 631 632 PetscFunctionBegin; 633 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 634 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 635 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 636 637 /* Load the matrix in natural ordering */ 638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 639 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 640 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 642 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 643 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 644 645 /* Map natural ordering to application ordering and create IS */ 646 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 647 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 648 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 649 for (i=rstart; i<rend; i++) app[i-rstart] = i; 650 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 651 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 652 653 /* Do permutation and replace header */ 654 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 655 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 656 ierr = ISDestroy(&is);CHKERRQ(ierr); 657 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 662 { 663 PetscErrorCode ierr; 664 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 665 Mat A; 666 MPI_Comm comm; 667 MatType Atype; 668 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 669 MatType mtype; 670 PetscMPIInt size; 671 DM_DA *dd = (DM_DA*)da->data; 672 673 PetscFunctionBegin; 674 ierr = MatInitializePackage();CHKERRQ(ierr); 675 mtype = da->mattype; 676 677 /* 678 m 679 ------------------------------------------------------ 680 | | 681 | | 682 | ---------------------- | 683 | | | | 684 n | ny | | | 685 | | | | 686 | .--------------------- | 687 | (xs,ys) nx | 688 | . | 689 | (gxs,gys) | 690 | | 691 ----------------------------------------------------- 692 */ 693 694 /* 695 nc - number of components per grid point 696 col - number of colors needed in one direction for single component problem 697 698 */ 699 M = dd->M; 700 N = dd->N; 701 P = dd->P; 702 dim = da->dim; 703 dof = dd->w; 704 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 705 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 706 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 707 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 708 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 709 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 710 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 711 ierr = MatSetDM(A,da);CHKERRQ(ierr); 712 if (da->structure_only) { 713 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 714 } 715 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 716 /* 717 We do not provide a getmatrix function in the DMDA operations because 718 the basic DMDA does not know about matrices. We think of DMDA as being more 719 more low-level than matrices. This is kind of cheating but, cause sometimes 720 we think of DMDA has higher level than matrices. 721 722 We could switch based on Atype (or mtype), but we do not since the 723 specialized setting routines depend only on the particular preallocation 724 details of the matrix, not the type itself. 725 */ 726 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 727 if (!aij) { 728 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 729 } 730 if (!aij) { 731 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 732 if (!baij) { 733 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 734 } 735 if (!baij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 737 if (!sbaij) { 738 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 739 } 740 if (!sbaij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 742 if (!sell) { 743 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 744 } 745 } 746 if (!sell) { 747 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 748 } 749 } 750 } 751 if (aij) { 752 if (dim == 1) { 753 if (dd->ofill) { 754 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 755 } else { 756 DMBoundaryType bx; 757 PetscMPIInt size; 758 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 759 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 760 if (size == 1 && bx == DM_BOUNDARY_NONE) { 761 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 762 } else { 763 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 764 } 765 } 766 } else if (dim == 2) { 767 if (dd->ofill) { 768 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 769 } else { 770 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 771 } 772 } else if (dim == 3) { 773 if (dd->ofill) { 774 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 775 } else { 776 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 777 } 778 } 779 } else if (baij) { 780 if (dim == 2) { 781 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 782 } else if (dim == 3) { 783 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 784 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 785 } else if (sbaij) { 786 if (dim == 2) { 787 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 788 } else if (dim == 3) { 789 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 790 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 791 } else if (sell) { 792 if (dim == 2) { 793 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 794 } else if (dim == 3) { 795 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 796 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 797 } else if (is) { 798 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 799 } else { 800 ISLocalToGlobalMapping ltog; 801 802 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 803 ierr = MatSetUp(A);CHKERRQ(ierr); 804 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 805 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 806 } 807 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 808 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 809 ierr = MatSetDM(A,da);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 811 if (size > 1) { 812 /* change viewer to display matrix in natural ordering */ 813 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 814 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 815 } 816 *J = A; 817 PetscFunctionReturn(0); 818 } 819 820 /* ---------------------------------------------------------------------------------*/ 821 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 822 823 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 824 { 825 DM_DA *da = (DM_DA*)dm->data; 826 Mat lJ; 827 ISLocalToGlobalMapping ltog; 828 IS is_loc_filt, is_glob; 829 const PetscInt *e_loc,*idx; 830 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 831 PetscBool flg; 832 PetscErrorCode ierr; 833 834 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 835 We need to filter the local indices that are represented through the DMDAGetElements decomposition 836 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 837 PetscFunctionBegin; 838 dof = da->w; 839 dim = dm->dim; 840 841 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 842 843 /* get local elements indices in local DMDA numbering */ 844 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 845 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 846 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 847 848 /* obtain a consistent local ordering for MATIS */ 849 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 850 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 851 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 852 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 853 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 854 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 855 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 856 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 857 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 858 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 859 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 860 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 861 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 862 863 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 864 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 865 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 866 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 867 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 868 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 869 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 870 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 871 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 872 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 873 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 874 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 875 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 876 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 877 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 878 ierr = PetscFree(gidx);CHKERRQ(ierr); 879 880 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 881 flg = dm->prealloc_only; 882 dm->prealloc_only = PETSC_TRUE; 883 switch (dim) { 884 case 1: 885 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 886 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 888 break; 889 case 2: 890 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 891 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 893 break; 894 case 3: 895 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 896 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 898 break; 899 default: 900 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 901 } 902 dm->prealloc_only = flg; 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 907 { 908 PetscErrorCode ierr; 909 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 910 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 911 MPI_Comm comm; 912 PetscScalar *values; 913 DMBoundaryType bx,by; 914 ISLocalToGlobalMapping ltog; 915 DMDAStencilType st; 916 917 PetscFunctionBegin; 918 /* 919 nc - number of components per grid point 920 col - number of colors needed in one direction for single component problem 921 922 */ 923 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 924 col = 2*s + 1; 925 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 926 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 927 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 928 929 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 930 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 931 932 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 933 /* determine the matrix preallocation information */ 934 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 935 for (i=xs; i<xs+nx; i++) { 936 937 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 938 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 939 940 for (j=ys; j<ys+ny; j++) { 941 slot = i - gxs + gnx*(j - gys); 942 943 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 944 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 945 946 cnt = 0; 947 for (k=0; k<nc; k++) { 948 for (l=lstart; l<lend+1; l++) { 949 for (p=pstart; p<pend+1; p++) { 950 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 951 cols[cnt++] = k + nc*(slot + gnx*l + p); 952 } 953 } 954 } 955 rows[k] = k + nc*(slot); 956 } 957 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 958 } 959 } 960 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 961 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 962 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 963 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 964 965 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 966 967 /* 968 For each node in the grid: we get the neighbors in the local (on processor ordering 969 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 970 PETSc ordering. 971 */ 972 if (!da->prealloc_only) { 973 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 974 for (i=xs; i<xs+nx; i++) { 975 976 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 977 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 978 979 for (j=ys; j<ys+ny; j++) { 980 slot = i - gxs + gnx*(j - gys); 981 982 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 983 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 984 985 cnt = 0; 986 for (k=0; k<nc; k++) { 987 for (l=lstart; l<lend+1; l++) { 988 for (p=pstart; p<pend+1; p++) { 989 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 990 cols[cnt++] = k + nc*(slot + gnx*l + p); 991 } 992 } 993 } 994 rows[k] = k + nc*(slot); 995 } 996 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 997 } 998 } 999 ierr = PetscFree(values);CHKERRQ(ierr); 1000 /* do not copy values to GPU since they are all zero and not yet needed there */ 1001 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1005 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1015 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1016 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1017 MPI_Comm comm; 1018 PetscScalar *values; 1019 DMBoundaryType bx,by,bz; 1020 ISLocalToGlobalMapping ltog; 1021 DMDAStencilType st; 1022 1023 PetscFunctionBegin; 1024 /* 1025 nc - number of components per grid point 1026 col - number of colors needed in one direction for single component problem 1027 1028 */ 1029 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1030 col = 2*s + 1; 1031 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1032 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1033 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1034 1035 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1036 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1037 1038 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1039 /* determine the matrix preallocation information */ 1040 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1041 for (i=xs; i<xs+nx; i++) { 1042 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1043 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1044 for (j=ys; j<ys+ny; j++) { 1045 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1046 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1047 for (k=zs; k<zs+nz; k++) { 1048 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1049 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1050 1051 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1052 1053 cnt = 0; 1054 for (l=0; l<nc; l++) { 1055 for (ii=istart; ii<iend+1; ii++) { 1056 for (jj=jstart; jj<jend+1; jj++) { 1057 for (kk=kstart; kk<kend+1; kk++) { 1058 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1059 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1060 } 1061 } 1062 } 1063 } 1064 rows[l] = l + nc*(slot); 1065 } 1066 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1067 } 1068 } 1069 } 1070 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1071 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1072 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1073 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1074 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1075 1076 /* 1077 For each node in the grid: we get the neighbors in the local (on processor ordering 1078 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1079 PETSc ordering. 1080 */ 1081 if (!da->prealloc_only) { 1082 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1083 for (i=xs; i<xs+nx; i++) { 1084 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1085 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1086 for (j=ys; j<ys+ny; j++) { 1087 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1088 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1089 for (k=zs; k<zs+nz; k++) { 1090 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1091 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1092 1093 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1094 1095 cnt = 0; 1096 for (l=0; l<nc; l++) { 1097 for (ii=istart; ii<iend+1; ii++) { 1098 for (jj=jstart; jj<jend+1; jj++) { 1099 for (kk=kstart; kk<kend+1; kk++) { 1100 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1101 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1102 } 1103 } 1104 } 1105 } 1106 rows[l] = l + nc*(slot); 1107 } 1108 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1109 } 1110 } 1111 } 1112 ierr = PetscFree(values);CHKERRQ(ierr); 1113 /* do not copy values to GPU since they are all zero and not yet needed there */ 1114 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1115 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1117 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1118 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1125 { 1126 PetscErrorCode ierr; 1127 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1128 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1129 MPI_Comm comm; 1130 DMBoundaryType bx,by; 1131 ISLocalToGlobalMapping ltog,mltog; 1132 DMDAStencilType st; 1133 PetscBool removedups = PETSC_FALSE; 1134 1135 PetscFunctionBegin; 1136 /* 1137 nc - number of components per grid point 1138 col - number of colors needed in one direction for single component problem 1139 1140 */ 1141 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1142 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1143 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1144 } 1145 col = 2*s + 1; 1146 /* 1147 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1148 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1149 */ 1150 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1151 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1152 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1153 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1155 1156 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1157 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1158 1159 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1160 /* determine the matrix preallocation information */ 1161 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1162 for (i=xs; i<xs+nx; i++) { 1163 1164 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1165 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1166 1167 for (j=ys; j<ys+ny; j++) { 1168 slot = i - gxs + gnx*(j - gys); 1169 1170 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1171 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1172 1173 cnt = 0; 1174 for (k=0; k<nc; k++) { 1175 for (l=lstart; l<lend+1; l++) { 1176 for (p=pstart; p<pend+1; p++) { 1177 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1178 cols[cnt++] = k + nc*(slot + gnx*l + p); 1179 } 1180 } 1181 } 1182 rows[k] = k + nc*(slot); 1183 } 1184 if (removedups) { 1185 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1186 } else { 1187 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1188 } 1189 } 1190 } 1191 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1192 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1193 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1194 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1195 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1196 if (!mltog) { 1197 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1198 } 1199 1200 /* 1201 For each node in the grid: we get the neighbors in the local (on processor ordering 1202 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1203 PETSc ordering. 1204 */ 1205 if (!da->prealloc_only) { 1206 for (i=xs; i<xs+nx; i++) { 1207 1208 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1209 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1210 1211 for (j=ys; j<ys+ny; j++) { 1212 slot = i - gxs + gnx*(j - gys); 1213 1214 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1215 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1216 1217 cnt = 0; 1218 for (l=lstart; l<lend+1; l++) { 1219 for (p=pstart; p<pend+1; p++) { 1220 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1221 cols[cnt++] = nc*(slot + gnx*l + p); 1222 for (k=1; k<nc; k++) { 1223 cols[cnt] = 1 + cols[cnt-1];cnt++; 1224 } 1225 } 1226 } 1227 } 1228 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1229 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1230 } 1231 } 1232 /* do not copy values to GPU since they are all zero and not yet needed there */ 1233 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1234 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1235 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1236 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1237 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1238 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1239 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1240 } 1241 } 1242 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1247 { 1248 PetscErrorCode ierr; 1249 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1250 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1251 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1252 DM_DA *dd = (DM_DA*)da->data; 1253 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1254 MPI_Comm comm; 1255 DMBoundaryType bx,by; 1256 ISLocalToGlobalMapping ltog; 1257 DMDAStencilType st; 1258 PetscBool removedups = PETSC_FALSE; 1259 1260 PetscFunctionBegin; 1261 /* 1262 nc - number of components per grid point 1263 col - number of colors needed in one direction for single component problem 1264 1265 */ 1266 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1267 col = 2*s + 1; 1268 /* 1269 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1270 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1271 */ 1272 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1273 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1274 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1275 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1277 1278 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1279 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1280 1281 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1282 /* determine the matrix preallocation information */ 1283 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1284 for (i=xs; i<xs+nx; i++) { 1285 1286 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1287 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1288 1289 for (j=ys; j<ys+ny; j++) { 1290 slot = i - gxs + gnx*(j - gys); 1291 1292 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1293 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1294 1295 for (k=0; k<nc; k++) { 1296 cnt = 0; 1297 for (l=lstart; l<lend+1; l++) { 1298 for (p=pstart; p<pend+1; p++) { 1299 if (l || p) { 1300 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1301 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1302 } 1303 } else { 1304 if (dfill) { 1305 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1306 } else { 1307 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1308 } 1309 } 1310 } 1311 } 1312 row = k + nc*(slot); 1313 maxcnt = PetscMax(maxcnt,cnt); 1314 if (removedups) { 1315 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1316 } else { 1317 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1318 } 1319 } 1320 } 1321 } 1322 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1323 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1324 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1325 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1326 1327 /* 1328 For each node in the grid: we get the neighbors in the local (on processor ordering 1329 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1330 PETSc ordering. 1331 */ 1332 if (!da->prealloc_only) { 1333 for (i=xs; i<xs+nx; i++) { 1334 1335 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1336 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1337 1338 for (j=ys; j<ys+ny; j++) { 1339 slot = i - gxs + gnx*(j - gys); 1340 1341 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1342 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1343 1344 for (k=0; k<nc; k++) { 1345 cnt = 0; 1346 for (l=lstart; l<lend+1; l++) { 1347 for (p=pstart; p<pend+1; p++) { 1348 if (l || p) { 1349 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1350 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1351 } 1352 } else { 1353 if (dfill) { 1354 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1355 } else { 1356 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1357 } 1358 } 1359 } 1360 } 1361 row = k + nc*(slot); 1362 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1363 } 1364 } 1365 } 1366 /* do not copy values to GPU since they are all zero and not yet needed there */ 1367 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1368 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1369 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1370 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1371 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree(cols);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 /* ---------------------------------------------------------------------------------*/ 1378 1379 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1380 { 1381 PetscErrorCode ierr; 1382 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1383 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1384 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1385 MPI_Comm comm; 1386 DMBoundaryType bx,by,bz; 1387 ISLocalToGlobalMapping ltog,mltog; 1388 DMDAStencilType st; 1389 PetscBool removedups = PETSC_FALSE; 1390 1391 PetscFunctionBegin; 1392 /* 1393 nc - number of components per grid point 1394 col - number of colors needed in one direction for single component problem 1395 1396 */ 1397 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1398 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1399 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1400 } 1401 col = 2*s + 1; 1402 1403 /* 1404 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1405 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1406 */ 1407 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1408 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1409 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1410 1411 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1412 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1413 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1414 1415 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1416 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1417 1418 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1419 /* determine the matrix preallocation information */ 1420 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1421 for (i=xs; i<xs+nx; i++) { 1422 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1423 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1424 for (j=ys; j<ys+ny; j++) { 1425 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1426 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1427 for (k=zs; k<zs+nz; k++) { 1428 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1429 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1430 1431 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1432 1433 cnt = 0; 1434 for (l=0; l<nc; l++) { 1435 for (ii=istart; ii<iend+1; ii++) { 1436 for (jj=jstart; jj<jend+1; jj++) { 1437 for (kk=kstart; kk<kend+1; kk++) { 1438 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1439 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1440 } 1441 } 1442 } 1443 } 1444 rows[l] = l + nc*(slot); 1445 } 1446 if (removedups) { 1447 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1448 } else { 1449 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1450 } 1451 } 1452 } 1453 } 1454 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1455 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1456 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1457 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1458 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1459 if (!mltog) { 1460 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1461 } 1462 1463 /* 1464 For each node in the grid: we get the neighbors in the local (on processor ordering 1465 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1466 PETSc ordering. 1467 */ 1468 if (!da->prealloc_only) { 1469 for (i=xs; i<xs+nx; i++) { 1470 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1471 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1472 for (j=ys; j<ys+ny; j++) { 1473 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1474 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1475 for (k=zs; k<zs+nz; k++) { 1476 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1477 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1478 1479 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1480 1481 cnt = 0; 1482 for (kk=kstart; kk<kend+1; kk++) { 1483 for (jj=jstart; jj<jend+1; jj++) { 1484 for (ii=istart; ii<iend+1; ii++) { 1485 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1486 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1487 for (l=1; l<nc; l++) { 1488 cols[cnt] = 1 + cols[cnt-1];cnt++; 1489 } 1490 } 1491 } 1492 } 1493 } 1494 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1495 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1496 } 1497 } 1498 } 1499 /* do not copy values to GPU since they are all zero and not yet needed there */ 1500 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1501 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1502 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1504 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1505 } 1506 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1507 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1508 } 1509 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 /* ---------------------------------------------------------------------------------*/ 1514 1515 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1516 { 1517 PetscErrorCode ierr; 1518 DM_DA *dd = (DM_DA*)da->data; 1519 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1520 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1521 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1522 DMBoundaryType bx; 1523 ISLocalToGlobalMapping ltog; 1524 PetscMPIInt rank,size; 1525 1526 PetscFunctionBegin; 1527 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1528 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1529 1530 /* 1531 nc - number of components per grid point 1532 1533 */ 1534 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1535 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1536 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1537 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1538 1539 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1540 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1541 1542 /* 1543 note should be smaller for first and last process with no periodic 1544 does not handle dfill 1545 */ 1546 cnt = 0; 1547 /* coupling with process to the left */ 1548 for (i=0; i<s; i++) { 1549 for (j=0; j<nc; j++) { 1550 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1551 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1552 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1553 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1554 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1555 } 1556 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1557 cnt++; 1558 } 1559 } 1560 for (i=s; i<nx-s; i++) { 1561 for (j=0; j<nc; j++) { 1562 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1563 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1564 cnt++; 1565 } 1566 } 1567 /* coupling with process to the right */ 1568 for (i=nx-s; i<nx; i++) { 1569 for (j=0; j<nc; j++) { 1570 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1571 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1572 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1573 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1574 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1575 } 1576 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1577 cnt++; 1578 } 1579 } 1580 1581 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1582 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1583 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1584 1585 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1586 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1587 1588 /* 1589 For each node in the grid: we get the neighbors in the local (on processor ordering 1590 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1591 PETSc ordering. 1592 */ 1593 if (!da->prealloc_only) { 1594 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1595 row = xs*nc; 1596 /* coupling with process to the left */ 1597 for (i=xs; i<xs+s; i++) { 1598 for (j=0; j<nc; j++) { 1599 cnt = 0; 1600 if (rank) { 1601 for (l=0; l<s; l++) { 1602 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1603 } 1604 } 1605 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1606 for (l=0; l<s; l++) { 1607 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1608 } 1609 } 1610 if (dfill) { 1611 for (k=dfill[j]; k<dfill[j+1]; k++) { 1612 cols[cnt++] = i*nc + dfill[k]; 1613 } 1614 } else { 1615 for (k=0; k<nc; k++) { 1616 cols[cnt++] = i*nc + k; 1617 } 1618 } 1619 for (l=0; l<s; l++) { 1620 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1621 } 1622 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1623 row++; 1624 } 1625 } 1626 for (i=xs+s; i<xs+nx-s; i++) { 1627 for (j=0; j<nc; j++) { 1628 cnt = 0; 1629 for (l=0; l<s; l++) { 1630 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1631 } 1632 if (dfill) { 1633 for (k=dfill[j]; k<dfill[j+1]; k++) { 1634 cols[cnt++] = i*nc + dfill[k]; 1635 } 1636 } else { 1637 for (k=0; k<nc; k++) { 1638 cols[cnt++] = i*nc + k; 1639 } 1640 } 1641 for (l=0; l<s; l++) { 1642 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1643 } 1644 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1645 row++; 1646 } 1647 } 1648 /* coupling with process to the right */ 1649 for (i=xs+nx-s; i<xs+nx; i++) { 1650 for (j=0; j<nc; j++) { 1651 cnt = 0; 1652 for (l=0; l<s; l++) { 1653 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1654 } 1655 if (dfill) { 1656 for (k=dfill[j]; k<dfill[j+1]; k++) { 1657 cols[cnt++] = i*nc + dfill[k]; 1658 } 1659 } else { 1660 for (k=0; k<nc; k++) { 1661 cols[cnt++] = i*nc + k; 1662 } 1663 } 1664 if (rank < size-1) { 1665 for (l=0; l<s; l++) { 1666 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1667 } 1668 } 1669 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1670 for (l=0; l<s; l++) { 1671 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1672 } 1673 } 1674 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1675 row++; 1676 } 1677 } 1678 ierr = PetscFree(cols);CHKERRQ(ierr); 1679 /* do not copy values to GPU since they are all zero and not yet needed there */ 1680 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1681 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1684 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 /* ---------------------------------------------------------------------------------*/ 1690 1691 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1692 { 1693 PetscErrorCode ierr; 1694 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1695 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1696 PetscInt istart,iend; 1697 DMBoundaryType bx; 1698 ISLocalToGlobalMapping ltog,mltog; 1699 1700 PetscFunctionBegin; 1701 /* 1702 nc - number of components per grid point 1703 col - number of colors needed in one direction for single component problem 1704 1705 */ 1706 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1707 if (!isIS && bx == DM_BOUNDARY_NONE) { 1708 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1709 } 1710 col = 2*s + 1; 1711 1712 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1713 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1714 1715 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1716 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1717 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1718 1719 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1720 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1721 if (!mltog) { 1722 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1723 } 1724 1725 /* 1726 For each node in the grid: we get the neighbors in the local (on processor ordering 1727 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1728 PETSc ordering. 1729 */ 1730 if (!da->prealloc_only) { 1731 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1732 for (i=xs; i<xs+nx; i++) { 1733 istart = PetscMax(-s,gxs - i); 1734 iend = PetscMin(s,gxs + gnx - i - 1); 1735 slot = i - gxs; 1736 1737 cnt = 0; 1738 for (i1=istart; i1<iend+1; i1++) { 1739 cols[cnt++] = nc*(slot + i1); 1740 for (l=1; l<nc; l++) { 1741 cols[cnt] = 1 + cols[cnt-1];cnt++; 1742 } 1743 } 1744 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1745 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1746 } 1747 /* do not copy values to GPU since they are all zero and not yet needed there */ 1748 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1749 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1750 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1751 if (!isIS && bx == DM_BOUNDARY_NONE) { 1752 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1753 } 1754 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1755 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1756 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /* ---------------------------------------------------------------------------------*/ 1762 1763 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1764 { 1765 PetscErrorCode ierr; 1766 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1767 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1768 PetscInt istart,iend; 1769 DMBoundaryType bx; 1770 ISLocalToGlobalMapping ltog,mltog; 1771 1772 PetscFunctionBegin; 1773 /* 1774 nc - number of components per grid point 1775 col - number of colors needed in one direction for single component problem 1776 */ 1777 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1778 col = 2*s + 1; 1779 1780 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1781 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1782 1783 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1784 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1785 1786 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1787 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1788 if (!mltog) { 1789 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1790 } 1791 1792 /* 1793 For each node in the grid: we get the neighbors in the local (on processor ordering 1794 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1795 PETSc ordering. 1796 */ 1797 if (!da->prealloc_only) { 1798 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1799 for (i=xs; i<xs+nx; i++) { 1800 istart = PetscMax(-s,gxs - i); 1801 iend = PetscMin(s,gxs + gnx - i - 1); 1802 slot = i - gxs; 1803 1804 cnt = 0; 1805 for (i1=istart; i1<iend+1; i1++) { 1806 cols[cnt++] = nc*(slot + i1); 1807 for (l=1; l<nc; l++) { 1808 cols[cnt] = 1 + cols[cnt-1];cnt++; 1809 } 1810 } 1811 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1812 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1813 } 1814 /* do not copy values to GPU since they are all zero and not yet needed there */ 1815 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1816 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1817 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 if (!isIS && bx == DM_BOUNDARY_NONE) { 1819 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1820 } 1821 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1822 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1823 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1824 } 1825 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1830 { 1831 PetscErrorCode ierr; 1832 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1833 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1834 PetscInt istart,iend,jstart,jend,ii,jj; 1835 MPI_Comm comm; 1836 PetscScalar *values; 1837 DMBoundaryType bx,by; 1838 DMDAStencilType st; 1839 ISLocalToGlobalMapping ltog; 1840 1841 PetscFunctionBegin; 1842 /* 1843 nc - number of components per grid point 1844 col - number of colors needed in one direction for single component problem 1845 */ 1846 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1847 col = 2*s + 1; 1848 1849 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1850 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1851 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1852 1853 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1854 1855 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1856 1857 /* determine the matrix preallocation information */ 1858 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1859 for (i=xs; i<xs+nx; i++) { 1860 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1861 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1862 for (j=ys; j<ys+ny; j++) { 1863 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1864 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1865 slot = i - gxs + gnx*(j - gys); 1866 1867 /* Find block columns in block row */ 1868 cnt = 0; 1869 for (ii=istart; ii<iend+1; ii++) { 1870 for (jj=jstart; jj<jend+1; jj++) { 1871 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1872 cols[cnt++] = slot + ii + gnx*jj; 1873 } 1874 } 1875 } 1876 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1877 } 1878 } 1879 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1880 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1881 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1882 1883 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1884 1885 /* 1886 For each node in the grid: we get the neighbors in the local (on processor ordering 1887 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1888 PETSc ordering. 1889 */ 1890 if (!da->prealloc_only) { 1891 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1892 for (i=xs; i<xs+nx; i++) { 1893 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1894 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1895 for (j=ys; j<ys+ny; j++) { 1896 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1897 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1898 slot = i - gxs + gnx*(j - gys); 1899 cnt = 0; 1900 for (ii=istart; ii<iend+1; ii++) { 1901 for (jj=jstart; jj<jend+1; jj++) { 1902 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1903 cols[cnt++] = slot + ii + gnx*jj; 1904 } 1905 } 1906 } 1907 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1908 } 1909 } 1910 ierr = PetscFree(values);CHKERRQ(ierr); 1911 /* do not copy values to GPU since they are all zero and not yet needed there */ 1912 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1913 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1914 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1915 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1916 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1917 } 1918 ierr = PetscFree(cols);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1923 { 1924 PetscErrorCode ierr; 1925 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1926 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1927 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1928 MPI_Comm comm; 1929 PetscScalar *values; 1930 DMBoundaryType bx,by,bz; 1931 DMDAStencilType st; 1932 ISLocalToGlobalMapping ltog; 1933 1934 PetscFunctionBegin; 1935 /* 1936 nc - number of components per grid point 1937 col - number of colors needed in one direction for single component problem 1938 1939 */ 1940 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1941 col = 2*s + 1; 1942 1943 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1944 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1945 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1946 1947 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1948 1949 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1950 1951 /* determine the matrix preallocation information */ 1952 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1953 for (i=xs; i<xs+nx; i++) { 1954 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1955 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1956 for (j=ys; j<ys+ny; j++) { 1957 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1958 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1959 for (k=zs; k<zs+nz; k++) { 1960 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1961 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1962 1963 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1964 1965 /* Find block columns in block row */ 1966 cnt = 0; 1967 for (ii=istart; ii<iend+1; ii++) { 1968 for (jj=jstart; jj<jend+1; jj++) { 1969 for (kk=kstart; kk<kend+1; kk++) { 1970 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1971 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1972 } 1973 } 1974 } 1975 } 1976 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1977 } 1978 } 1979 } 1980 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1981 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1982 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1983 1984 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1985 1986 /* 1987 For each node in the grid: we get the neighbors in the local (on processor ordering 1988 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1989 PETSc ordering. 1990 */ 1991 if (!da->prealloc_only) { 1992 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1993 for (i=xs; i<xs+nx; i++) { 1994 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1995 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1996 for (j=ys; j<ys+ny; j++) { 1997 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1998 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1999 for (k=zs; k<zs+nz; k++) { 2000 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2001 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2002 2003 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2004 2005 cnt = 0; 2006 for (ii=istart; ii<iend+1; ii++) { 2007 for (jj=jstart; jj<jend+1; jj++) { 2008 for (kk=kstart; kk<kend+1; kk++) { 2009 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2010 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2011 } 2012 } 2013 } 2014 } 2015 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2016 } 2017 } 2018 } 2019 ierr = PetscFree(values);CHKERRQ(ierr); 2020 /* do not copy values to GPU since they are all zero and not yet needed there */ 2021 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2022 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2025 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(cols);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 /* 2032 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2033 identify in the local ordering with periodic domain. 2034 */ 2035 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2036 { 2037 PetscErrorCode ierr; 2038 PetscInt i,n; 2039 2040 PetscFunctionBegin; 2041 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2042 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2043 for (i=0,n=0; i<*cnt; i++) { 2044 if (col[i] >= *row) col[n++] = col[i]; 2045 } 2046 *cnt = n; 2047 PetscFunctionReturn(0); 2048 } 2049 2050 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2051 { 2052 PetscErrorCode ierr; 2053 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2054 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2055 PetscInt istart,iend,jstart,jend,ii,jj; 2056 MPI_Comm comm; 2057 PetscScalar *values; 2058 DMBoundaryType bx,by; 2059 DMDAStencilType st; 2060 ISLocalToGlobalMapping ltog; 2061 2062 PetscFunctionBegin; 2063 /* 2064 nc - number of components per grid point 2065 col - number of colors needed in one direction for single component problem 2066 */ 2067 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2068 col = 2*s + 1; 2069 2070 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2071 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2072 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2073 2074 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2075 2076 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2077 2078 /* determine the matrix preallocation information */ 2079 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2080 for (i=xs; i<xs+nx; i++) { 2081 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2082 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2083 for (j=ys; j<ys+ny; j++) { 2084 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2085 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2086 slot = i - gxs + gnx*(j - gys); 2087 2088 /* Find block columns in block row */ 2089 cnt = 0; 2090 for (ii=istart; ii<iend+1; ii++) { 2091 for (jj=jstart; jj<jend+1; jj++) { 2092 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2093 cols[cnt++] = slot + ii + gnx*jj; 2094 } 2095 } 2096 } 2097 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2098 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2099 } 2100 } 2101 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2102 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2103 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2104 2105 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2106 2107 /* 2108 For each node in the grid: we get the neighbors in the local (on processor ordering 2109 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2110 PETSc ordering. 2111 */ 2112 if (!da->prealloc_only) { 2113 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2114 for (i=xs; i<xs+nx; i++) { 2115 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2116 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2117 for (j=ys; j<ys+ny; j++) { 2118 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2119 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2120 slot = i - gxs + gnx*(j - gys); 2121 2122 /* Find block columns in block row */ 2123 cnt = 0; 2124 for (ii=istart; ii<iend+1; ii++) { 2125 for (jj=jstart; jj<jend+1; jj++) { 2126 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2127 cols[cnt++] = slot + ii + gnx*jj; 2128 } 2129 } 2130 } 2131 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2132 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2133 } 2134 } 2135 ierr = PetscFree(values);CHKERRQ(ierr); 2136 /* do not copy values to GPU since they are all zero and not yet needed there */ 2137 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2138 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2140 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2141 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2142 } 2143 ierr = PetscFree(cols);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2148 { 2149 PetscErrorCode ierr; 2150 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2151 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2152 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2153 MPI_Comm comm; 2154 PetscScalar *values; 2155 DMBoundaryType bx,by,bz; 2156 DMDAStencilType st; 2157 ISLocalToGlobalMapping ltog; 2158 2159 PetscFunctionBegin; 2160 /* 2161 nc - number of components per grid point 2162 col - number of colors needed in one direction for single component problem 2163 */ 2164 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2165 col = 2*s + 1; 2166 2167 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2168 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2169 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2170 2171 /* create the matrix */ 2172 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2173 2174 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2175 2176 /* determine the matrix preallocation information */ 2177 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2178 for (i=xs; i<xs+nx; i++) { 2179 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2180 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2181 for (j=ys; j<ys+ny; j++) { 2182 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2183 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2184 for (k=zs; k<zs+nz; k++) { 2185 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2186 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2187 2188 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2189 2190 /* Find block columns in block row */ 2191 cnt = 0; 2192 for (ii=istart; ii<iend+1; ii++) { 2193 for (jj=jstart; jj<jend+1; jj++) { 2194 for (kk=kstart; kk<kend+1; kk++) { 2195 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2196 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2197 } 2198 } 2199 } 2200 } 2201 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2202 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2203 } 2204 } 2205 } 2206 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2207 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2208 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2209 2210 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2211 2212 /* 2213 For each node in the grid: we get the neighbors in the local (on processor ordering 2214 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2215 PETSc ordering. 2216 */ 2217 if (!da->prealloc_only) { 2218 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2219 for (i=xs; i<xs+nx; i++) { 2220 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2221 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2222 for (j=ys; j<ys+ny; j++) { 2223 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2224 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2225 for (k=zs; k<zs+nz; k++) { 2226 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2227 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2228 2229 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2230 2231 cnt = 0; 2232 for (ii=istart; ii<iend+1; ii++) { 2233 for (jj=jstart; jj<jend+1; jj++) { 2234 for (kk=kstart; kk<kend+1; kk++) { 2235 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2236 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2237 } 2238 } 2239 } 2240 } 2241 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2242 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 } 2245 } 2246 ierr = PetscFree(values);CHKERRQ(ierr); 2247 /* do not copy values to GPU since they are all zero and not yet needed there */ 2248 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2249 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2252 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2253 } 2254 ierr = PetscFree(cols);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 /* ---------------------------------------------------------------------------------*/ 2259 2260 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2261 { 2262 PetscErrorCode ierr; 2263 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2264 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2265 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2266 DM_DA *dd = (DM_DA*)da->data; 2267 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2268 MPI_Comm comm; 2269 PetscScalar *values; 2270 DMBoundaryType bx,by,bz; 2271 ISLocalToGlobalMapping ltog; 2272 DMDAStencilType st; 2273 PetscBool removedups = PETSC_FALSE; 2274 2275 PetscFunctionBegin; 2276 /* 2277 nc - number of components per grid point 2278 col - number of colors needed in one direction for single component problem 2279 2280 */ 2281 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2282 col = 2*s + 1; 2283 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2284 by 2*stencil_width + 1\n"); 2285 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2286 by 2*stencil_width + 1\n"); 2287 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2288 by 2*stencil_width + 1\n"); 2289 2290 /* 2291 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2292 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2293 */ 2294 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2295 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2296 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2297 2298 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2299 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2300 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2301 2302 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2303 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2304 2305 /* determine the matrix preallocation information */ 2306 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2307 2308 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2309 for (i=xs; i<xs+nx; i++) { 2310 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2311 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2312 for (j=ys; j<ys+ny; j++) { 2313 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2314 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2315 for (k=zs; k<zs+nz; k++) { 2316 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2317 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2318 2319 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2320 2321 for (l=0; l<nc; l++) { 2322 cnt = 0; 2323 for (ii=istart; ii<iend+1; ii++) { 2324 for (jj=jstart; jj<jend+1; jj++) { 2325 for (kk=kstart; kk<kend+1; kk++) { 2326 if (ii || jj || kk) { 2327 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2328 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2329 } 2330 } else { 2331 if (dfill) { 2332 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2333 } else { 2334 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2335 } 2336 } 2337 } 2338 } 2339 } 2340 row = l + nc*(slot); 2341 maxcnt = PetscMax(maxcnt,cnt); 2342 if (removedups) { 2343 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2344 } else { 2345 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2346 } 2347 } 2348 } 2349 } 2350 } 2351 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2352 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2353 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2354 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2355 2356 /* 2357 For each node in the grid: we get the neighbors in the local (on processor ordering 2358 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2359 PETSc ordering. 2360 */ 2361 if (!da->prealloc_only) { 2362 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2363 for (i=xs; i<xs+nx; i++) { 2364 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2365 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2366 for (j=ys; j<ys+ny; j++) { 2367 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2368 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2369 for (k=zs; k<zs+nz; k++) { 2370 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2371 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2372 2373 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2374 2375 for (l=0; l<nc; l++) { 2376 cnt = 0; 2377 for (ii=istart; ii<iend+1; ii++) { 2378 for (jj=jstart; jj<jend+1; jj++) { 2379 for (kk=kstart; kk<kend+1; kk++) { 2380 if (ii || jj || kk) { 2381 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2382 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2383 } 2384 } else { 2385 if (dfill) { 2386 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2387 } else { 2388 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2389 } 2390 } 2391 } 2392 } 2393 } 2394 row = l + nc*(slot); 2395 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2396 } 2397 } 2398 } 2399 } 2400 ierr = PetscFree(values);CHKERRQ(ierr); 2401 /* do not copy values to GPU since they are all zero and not yet needed there */ 2402 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2403 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2404 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2405 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2406 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2407 } 2408 ierr = PetscFree(cols);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411