1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscErrorCode ierr; 54 PetscInt nz; 55 56 PetscFunctionBegin; 57 if (!dfillsparse) PetscFunctionReturn(0); 58 59 /* Determine number of non-zeros */ 60 nz = (dfillsparse[w] - w - 1); 61 62 /* Allocate space for our copy of the given sparse matrix representation. */ 63 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 64 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 69 { 70 PetscErrorCode ierr; 71 PetscInt i,k,cnt = 1; 72 73 PetscFunctionBegin; 74 75 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 76 columns to the left with any nonzeros in them plus 1 */ 77 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 78 for (i=0; i<dd->w; i++) { 79 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 80 } 81 for (i=0; i<dd->w; i++) { 82 if (dd->ofillcols[i]) { 83 dd->ofillcols[i] = cnt++; 84 } 85 } 86 PetscFunctionReturn(0); 87 } 88 89 /*@ 90 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 91 of the matrix returned by DMCreateMatrix(). 92 93 Logically Collective on da 94 95 Input Parameters: 96 + da - the distributed array 97 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 98 - ofill - the fill pattern in the off-diagonal blocks 99 100 Level: developer 101 102 Notes: 103 This only makes sense when you are doing multicomponent problems but using the 104 MPIAIJ matrix format 105 106 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 107 representing coupling and 0 entries for missing coupling. For example 108 $ dfill[9] = {1, 0, 0, 109 $ 1, 1, 0, 110 $ 0, 1, 1} 111 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 112 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 113 diagonal block). 114 115 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 116 can be represented in the dfill, ofill format 117 118 Contributed by Glenn Hammond 119 120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 121 122 @*/ 123 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 124 { 125 DM_DA *dd = (DM_DA*)da->data; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 /* save the given dfill and ofill information */ 130 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 131 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 132 133 /* count nonzeros in ofill columns */ 134 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 /*@ 140 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 141 of the matrix returned by DMCreateMatrix(), using sparse representations 142 of fill patterns. 143 144 Logically Collective on da 145 146 Input Parameters: 147 + da - the distributed array 148 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 149 - ofill - the sparse fill pattern in the off-diagonal blocks 150 151 Level: developer 152 153 Notes: This only makes sense when you are doing multicomponent problems but using the 154 MPIAIJ matrix format 155 156 The format for dfill and ofill is a sparse representation of a 157 dof-by-dof matrix with 1 entries representing coupling and 0 entries 158 for missing coupling. The sparse representation is a 1 dimensional 159 array of length nz + dof + 1, where nz is the number of non-zeros in 160 the matrix. The first dof entries in the array give the 161 starting array indices of each row's items in the rest of the array, 162 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 163 and the remaining nz items give the column indices of each of 164 the 1s within the logical 2D matrix. Each row's items within 165 the array are the column indices of the 1s within that row 166 of the 2D matrix. PETSc developers may recognize that this is the 167 same format as that computed by the DMDASetBlockFills_Private() 168 function from a dense 2D matrix representation. 169 170 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 171 can be represented in the dfill, ofill format 172 173 Contributed by Philip C. Roth 174 175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 176 177 @*/ 178 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 179 { 180 DM_DA *dd = (DM_DA*)da->data; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* save the given dfill and ofill information */ 185 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 186 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 187 188 /* count nonzeros in ofill columns */ 189 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 190 191 PetscFunctionReturn(0); 192 } 193 194 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 195 { 196 PetscErrorCode ierr; 197 PetscInt dim,m,n,p,nc; 198 DMBoundaryType bx,by,bz; 199 MPI_Comm comm; 200 PetscMPIInt size; 201 PetscBool isBAIJ; 202 DM_DA *dd = (DM_DA*)da->data; 203 204 PetscFunctionBegin; 205 /* 206 m 207 ------------------------------------------------------ 208 | | 209 | | 210 | ---------------------- | 211 | | | | 212 n | yn | | | 213 | | | | 214 | .--------------------- | 215 | (xs,ys) xn | 216 | . | 217 | (gxs,gys) | 218 | | 219 ----------------------------------------------------- 220 */ 221 222 /* 223 nc - number of components per grid point 224 col - number of colors needed in one direction for single component problem 225 226 */ 227 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 228 229 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 230 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 231 if (ctype == IS_COLORING_LOCAL) { 232 if (size == 1) { 233 ctype = IS_COLORING_GLOBAL; 234 } else if (dim > 1) { 235 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 236 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 237 } 238 } 239 } 240 241 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 242 matrices is for the blocks, not the individual matrix elements */ 243 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 244 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (isBAIJ) { 247 dd->w = 1; 248 dd->xs = dd->xs/nc; 249 dd->xe = dd->xe/nc; 250 dd->Xs = dd->Xs/nc; 251 dd->Xe = dd->Xe/nc; 252 } 253 254 /* 255 We do not provide a getcoloring function in the DMDA operations because 256 the basic DMDA does not know about matrices. We think of DMDA as being 257 more low-level then matrices. 258 */ 259 if (dim == 1) { 260 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 261 } else if (dim == 2) { 262 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 263 } else if (dim == 3) { 264 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 265 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 266 if (isBAIJ) { 267 dd->w = nc; 268 dd->xs = dd->xs*nc; 269 dd->xe = dd->xe*nc; 270 dd->Xs = dd->Xs*nc; 271 dd->Xe = dd->Xe*nc; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 /* ---------------------------------------------------------------------------------*/ 277 278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 279 { 280 PetscErrorCode ierr; 281 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 282 PetscInt ncolors; 283 MPI_Comm comm; 284 DMBoundaryType bx,by; 285 DMDAStencilType st; 286 ISColoringValue *colors; 287 DM_DA *dd = (DM_DA*)da->data; 288 289 PetscFunctionBegin; 290 /* 291 nc - number of components per grid point 292 col - number of colors needed in one direction for single component problem 293 294 */ 295 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 296 col = 2*s + 1; 297 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 298 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 299 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 300 301 /* special case as taught to us by Paul Hovland */ 302 if (st == DMDA_STENCIL_STAR && s == 1) { 303 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 304 } else { 305 if (ctype == IS_COLORING_GLOBAL) { 306 if (!dd->localcoloring) { 307 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 308 ii = 0; 309 for (j=ys; j<ys+ny; j++) { 310 for (i=xs; i<xs+nx; i++) { 311 for (k=0; k<nc; k++) { 312 colors[ii++] = k + nc*((i % col) + col*(j % col)); 313 } 314 } 315 } 316 ncolors = nc + nc*(col-1 + col*(col-1)); 317 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 318 } 319 *coloring = dd->localcoloring; 320 } else if (ctype == IS_COLORING_LOCAL) { 321 if (!dd->ghostedcoloring) { 322 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 323 ii = 0; 324 for (j=gys; j<gys+gny; j++) { 325 for (i=gxs; i<gxs+gnx; i++) { 326 for (k=0; k<nc; k++) { 327 /* the complicated stuff is to handle periodic boundaries */ 328 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 329 } 330 } 331 } 332 ncolors = nc + nc*(col - 1 + col*(col-1)); 333 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 334 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 335 336 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 337 } 338 *coloring = dd->ghostedcoloring; 339 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 340 } 341 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* ---------------------------------------------------------------------------------*/ 346 347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 348 { 349 PetscErrorCode ierr; 350 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 351 PetscInt ncolors; 352 MPI_Comm comm; 353 DMBoundaryType bx,by,bz; 354 DMDAStencilType st; 355 ISColoringValue *colors; 356 DM_DA *dd = (DM_DA*)da->data; 357 358 PetscFunctionBegin; 359 /* 360 nc - number of components per grid point 361 col - number of colors needed in one direction for single component problem 362 363 */ 364 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 365 col = 2*s + 1; 366 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 367 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 368 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 369 370 /* create the coloring */ 371 if (ctype == IS_COLORING_GLOBAL) { 372 if (!dd->localcoloring) { 373 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 374 ii = 0; 375 for (k=zs; k<zs+nz; k++) { 376 for (j=ys; j<ys+ny; j++) { 377 for (i=xs; i<xs+nx; i++) { 378 for (l=0; l<nc; l++) { 379 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 385 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 386 } 387 *coloring = dd->localcoloring; 388 } else if (ctype == IS_COLORING_LOCAL) { 389 if (!dd->ghostedcoloring) { 390 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 391 ii = 0; 392 for (k=gzs; k<gzs+gnz; k++) { 393 for (j=gys; j<gys+gny; j++) { 394 for (i=gxs; i<gxs+gnx; i++) { 395 for (l=0; l<nc; l++) { 396 /* the complicated stuff is to handle periodic boundaries */ 397 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 398 } 399 } 400 } 401 } 402 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 403 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 404 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 405 } 406 *coloring = dd->ghostedcoloring; 407 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 408 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* ---------------------------------------------------------------------------------*/ 413 414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 415 { 416 PetscErrorCode ierr; 417 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 418 PetscInt ncolors; 419 MPI_Comm comm; 420 DMBoundaryType bx; 421 ISColoringValue *colors; 422 DM_DA *dd = (DM_DA*)da->data; 423 424 PetscFunctionBegin; 425 /* 426 nc - number of components per grid point 427 col - number of colors needed in one direction for single component problem 428 429 */ 430 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 431 col = 2*s + 1; 432 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 433 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 434 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 435 436 /* create the coloring */ 437 if (ctype == IS_COLORING_GLOBAL) { 438 if (!dd->localcoloring) { 439 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 440 if (dd->ofillcols) { 441 PetscInt tc = 0; 442 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 443 i1 = 0; 444 for (i=xs; i<xs+nx; i++) { 445 for (l=0; l<nc; l++) { 446 if (dd->ofillcols[l] && (i % col)) { 447 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 448 } else { 449 colors[i1++] = l; 450 } 451 } 452 } 453 ncolors = nc + 2*s*tc; 454 } else { 455 i1 = 0; 456 for (i=xs; i<xs+nx; i++) { 457 for (l=0; l<nc; l++) { 458 colors[i1++] = l + nc*(i % col); 459 } 460 } 461 ncolors = nc + nc*(col-1); 462 } 463 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 464 } 465 *coloring = dd->localcoloring; 466 } else if (ctype == IS_COLORING_LOCAL) { 467 if (!dd->ghostedcoloring) { 468 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 469 i1 = 0; 470 for (i=gxs; i<gxs+gnx; i++) { 471 for (l=0; l<nc; l++) { 472 /* the complicated stuff is to handle periodic boundaries */ 473 colors[i1++] = l + nc*(SetInRange(i,m) % col); 474 } 475 } 476 ncolors = nc + nc*(col-1); 477 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 478 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 479 } 480 *coloring = dd->ghostedcoloring; 481 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 482 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 487 { 488 PetscErrorCode ierr; 489 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 490 PetscInt ncolors; 491 MPI_Comm comm; 492 DMBoundaryType bx,by; 493 ISColoringValue *colors; 494 DM_DA *dd = (DM_DA*)da->data; 495 496 PetscFunctionBegin; 497 /* 498 nc - number of components per grid point 499 col - number of colors needed in one direction for single component problem 500 501 */ 502 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 503 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 505 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 506 /* create the coloring */ 507 if (ctype == IS_COLORING_GLOBAL) { 508 if (!dd->localcoloring) { 509 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 510 ii = 0; 511 for (j=ys; j<ys+ny; j++) { 512 for (i=xs; i<xs+nx; i++) { 513 for (k=0; k<nc; k++) { 514 colors[ii++] = k + nc*((3*j+i) % 5); 515 } 516 } 517 } 518 ncolors = 5*nc; 519 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 520 } 521 *coloring = dd->localcoloring; 522 } else if (ctype == IS_COLORING_LOCAL) { 523 if (!dd->ghostedcoloring) { 524 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 525 ii = 0; 526 for (j=gys; j<gys+gny; j++) { 527 for (i=gxs; i<gxs+gnx; i++) { 528 for (k=0; k<nc; k++) { 529 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 530 } 531 } 532 } 533 ncolors = 5*nc; 534 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 535 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 536 } 537 *coloring = dd->ghostedcoloring; 538 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 539 PetscFunctionReturn(0); 540 } 541 542 /* =========================================================================== */ 543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 557 558 /*@C 559 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 560 561 Logically Collective on mat 562 563 Input Parameters: 564 + mat - the matrix 565 - da - the da 566 567 Level: intermediate 568 569 @*/ 570 PetscErrorCode MatSetupDM(Mat mat,DM da) 571 { 572 PetscErrorCode ierr; 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 576 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 577 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 582 { 583 DM da; 584 PetscErrorCode ierr; 585 const char *prefix; 586 Mat Anatural; 587 AO ao; 588 PetscInt rstart,rend,*petsc,i; 589 IS is; 590 MPI_Comm comm; 591 PetscViewerFormat format; 592 593 PetscFunctionBegin; 594 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 597 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 600 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 601 602 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 603 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 604 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 605 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 606 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 607 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 608 609 /* call viewer on natural ordering */ 610 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 611 ierr = ISDestroy(&is);CHKERRQ(ierr); 612 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 613 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 615 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 616 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 617 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 618 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 623 { 624 DM da; 625 PetscErrorCode ierr; 626 Mat Anatural,Aapp; 627 AO ao; 628 PetscInt rstart,rend,*app,i,m,n,M,N; 629 IS is; 630 MPI_Comm comm; 631 632 PetscFunctionBegin; 633 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 634 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 635 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 636 637 /* Load the matrix in natural ordering */ 638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 639 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 640 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 642 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 643 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 644 645 /* Map natural ordering to application ordering and create IS */ 646 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 647 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 648 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 649 for (i=rstart; i<rend; i++) app[i-rstart] = i; 650 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 651 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 652 653 /* Do permutation and replace header */ 654 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 655 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 656 ierr = ISDestroy(&is);CHKERRQ(ierr); 657 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 662 { 663 PetscErrorCode ierr; 664 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 665 Mat A; 666 MPI_Comm comm; 667 MatType Atype; 668 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 669 MatType mtype; 670 PetscMPIInt size; 671 DM_DA *dd = (DM_DA*)da->data; 672 673 PetscFunctionBegin; 674 ierr = MatInitializePackage();CHKERRQ(ierr); 675 mtype = da->mattype; 676 677 /* 678 m 679 ------------------------------------------------------ 680 | | 681 | | 682 | ---------------------- | 683 | | | | 684 n | ny | | | 685 | | | | 686 | .--------------------- | 687 | (xs,ys) nx | 688 | . | 689 | (gxs,gys) | 690 | | 691 ----------------------------------------------------- 692 */ 693 694 /* 695 nc - number of components per grid point 696 col - number of colors needed in one direction for single component problem 697 698 */ 699 M = dd->M; 700 N = dd->N; 701 P = dd->P; 702 dim = da->dim; 703 dof = dd->w; 704 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 705 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 706 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 707 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 708 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 709 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 710 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 711 ierr = MatSetDM(A,da);CHKERRQ(ierr); 712 if (da->structure_only) { 713 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 714 } 715 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 716 /* 717 We do not provide a getmatrix function in the DMDA operations because 718 the basic DMDA does not know about matrices. We think of DMDA as being more 719 more low-level than matrices. This is kind of cheating but, cause sometimes 720 we think of DMDA has higher level than matrices. 721 722 We could switch based on Atype (or mtype), but we do not since the 723 specialized setting routines depend only on the particular preallocation 724 details of the matrix, not the type itself. 725 */ 726 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 727 if (!aij) { 728 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 729 } 730 if (!aij) { 731 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 732 if (!baij) { 733 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 734 } 735 if (!baij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 737 if (!sbaij) { 738 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 739 } 740 if (!sbaij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 742 if (!sell) { 743 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 744 } 745 } 746 if (!sell) { 747 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 748 } 749 } 750 } 751 if (aij) { 752 if (dim == 1) { 753 if (dd->ofill) { 754 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 755 } else { 756 DMBoundaryType bx; 757 PetscMPIInt size; 758 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 759 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 760 if (size == 1 && bx == DM_BOUNDARY_NONE) { 761 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 762 } else { 763 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 764 } 765 } 766 } else if (dim == 2) { 767 if (dd->ofill) { 768 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 769 } else { 770 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 771 } 772 } else if (dim == 3) { 773 if (dd->ofill) { 774 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 775 } else { 776 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 777 } 778 } 779 } else if (baij) { 780 if (dim == 2) { 781 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 782 } else if (dim == 3) { 783 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 784 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 785 } else if (sbaij) { 786 if (dim == 2) { 787 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 788 } else if (dim == 3) { 789 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 790 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 791 } else if (sell) { 792 if (dim == 2) { 793 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 794 } else if (dim == 3) { 795 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 796 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 797 } else if (is) { 798 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 799 } else { 800 ISLocalToGlobalMapping ltog; 801 802 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 803 ierr = MatSetUp(A);CHKERRQ(ierr); 804 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 805 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 806 } 807 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 808 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 809 ierr = MatSetDM(A,da);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 811 if (size > 1) { 812 /* change viewer to display matrix in natural ordering */ 813 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 814 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 815 } 816 *J = A; 817 PetscFunctionReturn(0); 818 } 819 820 /* ---------------------------------------------------------------------------------*/ 821 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 822 823 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 824 { 825 DM_DA *da = (DM_DA*)dm->data; 826 Mat lJ; 827 ISLocalToGlobalMapping ltog; 828 IS is_loc_filt, is_glob; 829 const PetscInt *e_loc,*idx; 830 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 831 PetscBool flg; 832 PetscErrorCode ierr; 833 834 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 835 We need to filter the local indices that are represented through the DMDAGetElements decomposition 836 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 837 PetscFunctionBegin; 838 dof = da->w; 839 dim = dm->dim; 840 841 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 842 843 /* get local elements indices in local DMDA numbering */ 844 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 845 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 846 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 847 848 /* obtain a consistent local ordering for MATIS */ 849 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 850 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 851 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 852 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 853 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 854 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 855 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 856 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 857 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 858 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 859 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 860 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 861 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 862 863 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 864 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 865 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 866 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 867 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 868 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 869 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 870 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 871 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 872 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 873 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 874 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 875 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 876 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 877 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 878 ierr = PetscFree(gidx);CHKERRQ(ierr); 879 880 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 881 flg = dm->prealloc_only; 882 dm->prealloc_only = PETSC_TRUE; 883 switch (dim) { 884 case 1: 885 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 886 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 888 break; 889 case 2: 890 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 891 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 893 break; 894 case 3: 895 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 896 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 898 break; 899 default: 900 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 901 } 902 dm->prealloc_only = flg; 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 907 { 908 PetscErrorCode ierr; 909 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 910 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 911 MPI_Comm comm; 912 PetscScalar *values; 913 DMBoundaryType bx,by; 914 ISLocalToGlobalMapping ltog; 915 DMDAStencilType st; 916 917 PetscFunctionBegin; 918 /* 919 nc - number of components per grid point 920 col - number of colors needed in one direction for single component problem 921 922 */ 923 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 924 col = 2*s + 1; 925 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 926 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 927 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 928 929 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 930 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 931 932 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 933 /* determine the matrix preallocation information */ 934 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 935 for (i=xs; i<xs+nx; i++) { 936 937 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 938 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 939 940 for (j=ys; j<ys+ny; j++) { 941 slot = i - gxs + gnx*(j - gys); 942 943 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 944 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 945 946 cnt = 0; 947 for (k=0; k<nc; k++) { 948 for (l=lstart; l<lend+1; l++) { 949 for (p=pstart; p<pend+1; p++) { 950 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 951 cols[cnt++] = k + nc*(slot + gnx*l + p); 952 } 953 } 954 } 955 rows[k] = k + nc*(slot); 956 } 957 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 958 } 959 } 960 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 961 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 962 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 963 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 964 965 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 966 967 /* 968 For each node in the grid: we get the neighbors in the local (on processor ordering 969 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 970 PETSc ordering. 971 */ 972 if (!da->prealloc_only) { 973 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 974 for (i=xs; i<xs+nx; i++) { 975 976 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 977 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 978 979 for (j=ys; j<ys+ny; j++) { 980 slot = i - gxs + gnx*(j - gys); 981 982 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 983 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 984 985 cnt = 0; 986 for (k=0; k<nc; k++) { 987 for (l=lstart; l<lend+1; l++) { 988 for (p=pstart; p<pend+1; p++) { 989 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 990 cols[cnt++] = k + nc*(slot + gnx*l + p); 991 } 992 } 993 } 994 rows[k] = k + nc*(slot); 995 } 996 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 997 } 998 } 999 ierr = PetscFree(values);CHKERRQ(ierr); 1000 /* do not copy values to GPU since they are all zero and not yet needed there */ 1001 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1005 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1015 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1016 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1017 MPI_Comm comm; 1018 PetscScalar *values; 1019 DMBoundaryType bx,by,bz; 1020 ISLocalToGlobalMapping ltog; 1021 DMDAStencilType st; 1022 1023 PetscFunctionBegin; 1024 /* 1025 nc - number of components per grid point 1026 col - number of colors needed in one direction for single component problem 1027 1028 */ 1029 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1030 col = 2*s + 1; 1031 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1032 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1033 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1034 1035 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1036 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1037 1038 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1039 /* determine the matrix preallocation information */ 1040 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1041 for (i=xs; i<xs+nx; i++) { 1042 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1043 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1044 for (j=ys; j<ys+ny; j++) { 1045 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1046 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1047 for (k=zs; k<zs+nz; k++) { 1048 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1049 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1050 1051 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1052 1053 cnt = 0; 1054 for (l=0; l<nc; l++) { 1055 for (ii=istart; ii<iend+1; ii++) { 1056 for (jj=jstart; jj<jend+1; jj++) { 1057 for (kk=kstart; kk<kend+1; kk++) { 1058 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1059 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1060 } 1061 } 1062 } 1063 } 1064 rows[l] = l + nc*(slot); 1065 } 1066 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1067 } 1068 } 1069 } 1070 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1071 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1072 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1073 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1074 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1075 1076 /* 1077 For each node in the grid: we get the neighbors in the local (on processor ordering 1078 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1079 PETSc ordering. 1080 */ 1081 if (!da->prealloc_only) { 1082 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1083 for (i=xs; i<xs+nx; i++) { 1084 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1085 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1086 for (j=ys; j<ys+ny; j++) { 1087 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1088 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1089 for (k=zs; k<zs+nz; k++) { 1090 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1091 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1092 1093 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1094 1095 cnt = 0; 1096 for (l=0; l<nc; l++) { 1097 for (ii=istart; ii<iend+1; ii++) { 1098 for (jj=jstart; jj<jend+1; jj++) { 1099 for (kk=kstart; kk<kend+1; kk++) { 1100 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1101 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1102 } 1103 } 1104 } 1105 } 1106 rows[l] = l + nc*(slot); 1107 } 1108 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1109 } 1110 } 1111 } 1112 ierr = PetscFree(values);CHKERRQ(ierr); 1113 /* do not copy values to GPU since they are all zero and not yet needed there */ 1114 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1115 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1117 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1118 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1125 { 1126 PetscErrorCode ierr; 1127 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1128 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1129 MPI_Comm comm; 1130 DMBoundaryType bx,by; 1131 ISLocalToGlobalMapping ltog,mltog; 1132 DMDAStencilType st; 1133 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1134 1135 PetscFunctionBegin; 1136 /* 1137 nc - number of components per grid point 1138 col - number of colors needed in one direction for single component problem 1139 1140 */ 1141 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1142 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1143 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1144 } 1145 col = 2*s + 1; 1146 /* 1147 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1148 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1149 */ 1150 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1151 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1152 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1153 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1155 1156 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1157 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1158 1159 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1160 /* determine the matrix preallocation information */ 1161 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1162 for (i=xs; i<xs+nx; i++) { 1163 1164 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1165 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1166 1167 for (j=ys; j<ys+ny; j++) { 1168 slot = i - gxs + gnx*(j - gys); 1169 1170 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1171 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1172 1173 cnt = 0; 1174 for (k=0; k<nc; k++) { 1175 for (l=lstart; l<lend+1; l++) { 1176 for (p=pstart; p<pend+1; p++) { 1177 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1178 cols[cnt++] = k + nc*(slot + gnx*l + p); 1179 } 1180 } 1181 } 1182 rows[k] = k + nc*(slot); 1183 } 1184 if (removedups) { 1185 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1186 } else { 1187 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1188 } 1189 } 1190 } 1191 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1192 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1193 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1194 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1195 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1196 if (!mltog) { 1197 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1198 } 1199 1200 /* 1201 For each node in the grid: we get the neighbors in the local (on processor ordering 1202 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1203 PETSc ordering. 1204 */ 1205 if (!da->prealloc_only) { 1206 for (i=xs; i<xs+nx; i++) { 1207 1208 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1209 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1210 1211 for (j=ys; j<ys+ny; j++) { 1212 slot = i - gxs + gnx*(j - gys); 1213 1214 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1215 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1216 1217 cnt = 0; 1218 for (l=lstart; l<lend+1; l++) { 1219 for (p=pstart; p<pend+1; p++) { 1220 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1221 cols[cnt++] = nc*(slot + gnx*l + p); 1222 for (k=1; k<nc; k++) { 1223 cols[cnt] = 1 + cols[cnt-1];cnt++; 1224 } 1225 } 1226 } 1227 } 1228 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1229 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1230 } 1231 } 1232 /* do not copy values to GPU since they are all zero and not yet needed there */ 1233 ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr); 1234 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1235 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1236 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1237 if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);} 1238 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1239 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1240 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1241 } 1242 } 1243 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1248 { 1249 PetscErrorCode ierr; 1250 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1251 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1252 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1253 DM_DA *dd = (DM_DA*)da->data; 1254 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1255 MPI_Comm comm; 1256 DMBoundaryType bx,by; 1257 ISLocalToGlobalMapping ltog; 1258 DMDAStencilType st; 1259 PetscBool removedups = PETSC_FALSE; 1260 1261 PetscFunctionBegin; 1262 /* 1263 nc - number of components per grid point 1264 col - number of colors needed in one direction for single component problem 1265 1266 */ 1267 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1268 col = 2*s + 1; 1269 /* 1270 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1271 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1272 */ 1273 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1274 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1275 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1276 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1278 1279 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1280 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1281 1282 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1283 /* determine the matrix preallocation information */ 1284 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1285 for (i=xs; i<xs+nx; i++) { 1286 1287 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1288 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1289 1290 for (j=ys; j<ys+ny; j++) { 1291 slot = i - gxs + gnx*(j - gys); 1292 1293 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1294 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1295 1296 for (k=0; k<nc; k++) { 1297 cnt = 0; 1298 for (l=lstart; l<lend+1; l++) { 1299 for (p=pstart; p<pend+1; p++) { 1300 if (l || p) { 1301 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1302 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1303 } 1304 } else { 1305 if (dfill) { 1306 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1307 } else { 1308 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1309 } 1310 } 1311 } 1312 } 1313 row = k + nc*(slot); 1314 maxcnt = PetscMax(maxcnt,cnt); 1315 if (removedups) { 1316 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1317 } else { 1318 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1319 } 1320 } 1321 } 1322 } 1323 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1324 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1325 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1326 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1327 1328 /* 1329 For each node in the grid: we get the neighbors in the local (on processor ordering 1330 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1331 PETSc ordering. 1332 */ 1333 if (!da->prealloc_only) { 1334 for (i=xs; i<xs+nx; i++) { 1335 1336 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1337 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1338 1339 for (j=ys; j<ys+ny; j++) { 1340 slot = i - gxs + gnx*(j - gys); 1341 1342 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1343 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1344 1345 for (k=0; k<nc; k++) { 1346 cnt = 0; 1347 for (l=lstart; l<lend+1; l++) { 1348 for (p=pstart; p<pend+1; p++) { 1349 if (l || p) { 1350 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1351 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1352 } 1353 } else { 1354 if (dfill) { 1355 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1356 } else { 1357 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1358 } 1359 } 1360 } 1361 } 1362 row = k + nc*(slot); 1363 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 } 1366 } 1367 /* do not copy values to GPU since they are all zero and not yet needed there */ 1368 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1369 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1370 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1371 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1372 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(cols);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /* ---------------------------------------------------------------------------------*/ 1379 1380 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1381 { 1382 PetscErrorCode ierr; 1383 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1384 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1385 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1386 MPI_Comm comm; 1387 DMBoundaryType bx,by,bz; 1388 ISLocalToGlobalMapping ltog,mltog; 1389 DMDAStencilType st; 1390 PetscBool removedups = PETSC_FALSE; 1391 1392 PetscFunctionBegin; 1393 /* 1394 nc - number of components per grid point 1395 col - number of colors needed in one direction for single component problem 1396 1397 */ 1398 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1399 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1400 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1401 } 1402 col = 2*s + 1; 1403 1404 /* 1405 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1406 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1407 */ 1408 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1409 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1410 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1411 1412 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1413 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1414 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1415 1416 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1417 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1418 1419 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1420 /* determine the matrix preallocation information */ 1421 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1422 for (i=xs; i<xs+nx; i++) { 1423 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1424 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1425 for (j=ys; j<ys+ny; j++) { 1426 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1427 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1428 for (k=zs; k<zs+nz; k++) { 1429 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1430 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1431 1432 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1433 1434 cnt = 0; 1435 for (l=0; l<nc; l++) { 1436 for (ii=istart; ii<iend+1; ii++) { 1437 for (jj=jstart; jj<jend+1; jj++) { 1438 for (kk=kstart; kk<kend+1; kk++) { 1439 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1440 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1441 } 1442 } 1443 } 1444 } 1445 rows[l] = l + nc*(slot); 1446 } 1447 if (removedups) { 1448 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1449 } else { 1450 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1451 } 1452 } 1453 } 1454 } 1455 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1456 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1457 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1458 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1459 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1460 if (!mltog) { 1461 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1462 } 1463 1464 /* 1465 For each node in the grid: we get the neighbors in the local (on processor ordering 1466 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1467 PETSc ordering. 1468 */ 1469 if (!da->prealloc_only) { 1470 for (i=xs; i<xs+nx; i++) { 1471 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1472 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1473 for (j=ys; j<ys+ny; j++) { 1474 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1475 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1476 for (k=zs; k<zs+nz; k++) { 1477 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1478 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1479 1480 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1481 1482 cnt = 0; 1483 for (kk=kstart; kk<kend+1; kk++) { 1484 for (jj=jstart; jj<jend+1; jj++) { 1485 for (ii=istart; ii<iend+1; ii++) { 1486 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1487 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1488 for (l=1; l<nc; l++) { 1489 cols[cnt] = 1 + cols[cnt-1];cnt++; 1490 } 1491 } 1492 } 1493 } 1494 } 1495 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1496 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1497 } 1498 } 1499 } 1500 /* do not copy values to GPU since they are all zero and not yet needed there */ 1501 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1502 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1505 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1506 } 1507 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1508 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1509 } 1510 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /* ---------------------------------------------------------------------------------*/ 1515 1516 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1517 { 1518 PetscErrorCode ierr; 1519 DM_DA *dd = (DM_DA*)da->data; 1520 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1521 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1522 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1523 DMBoundaryType bx; 1524 ISLocalToGlobalMapping ltog; 1525 PetscMPIInt rank,size; 1526 1527 PetscFunctionBegin; 1528 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1529 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1530 1531 /* 1532 nc - number of components per grid point 1533 1534 */ 1535 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1536 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1537 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1538 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1539 1540 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1541 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1542 1543 /* 1544 note should be smaller for first and last process with no periodic 1545 does not handle dfill 1546 */ 1547 cnt = 0; 1548 /* coupling with process to the left */ 1549 for (i=0; i<s; i++) { 1550 for (j=0; j<nc; j++) { 1551 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1552 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1553 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1554 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1555 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1556 } 1557 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1558 cnt++; 1559 } 1560 } 1561 for (i=s; i<nx-s; i++) { 1562 for (j=0; j<nc; j++) { 1563 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1564 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1565 cnt++; 1566 } 1567 } 1568 /* coupling with process to the right */ 1569 for (i=nx-s; i<nx; i++) { 1570 for (j=0; j<nc; j++) { 1571 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1572 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1573 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1574 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1575 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1576 } 1577 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1578 cnt++; 1579 } 1580 } 1581 1582 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1583 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1584 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1585 1586 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1587 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1588 1589 /* 1590 For each node in the grid: we get the neighbors in the local (on processor ordering 1591 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1592 PETSc ordering. 1593 */ 1594 if (!da->prealloc_only) { 1595 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1596 row = xs*nc; 1597 /* coupling with process to the left */ 1598 for (i=xs; i<xs+s; i++) { 1599 for (j=0; j<nc; j++) { 1600 cnt = 0; 1601 if (rank) { 1602 for (l=0; l<s; l++) { 1603 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1604 } 1605 } 1606 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1607 for (l=0; l<s; l++) { 1608 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1609 } 1610 } 1611 if (dfill) { 1612 for (k=dfill[j]; k<dfill[j+1]; k++) { 1613 cols[cnt++] = i*nc + dfill[k]; 1614 } 1615 } else { 1616 for (k=0; k<nc; k++) { 1617 cols[cnt++] = i*nc + k; 1618 } 1619 } 1620 for (l=0; l<s; l++) { 1621 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1622 } 1623 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1624 row++; 1625 } 1626 } 1627 for (i=xs+s; i<xs+nx-s; i++) { 1628 for (j=0; j<nc; j++) { 1629 cnt = 0; 1630 for (l=0; l<s; l++) { 1631 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1632 } 1633 if (dfill) { 1634 for (k=dfill[j]; k<dfill[j+1]; k++) { 1635 cols[cnt++] = i*nc + dfill[k]; 1636 } 1637 } else { 1638 for (k=0; k<nc; k++) { 1639 cols[cnt++] = i*nc + k; 1640 } 1641 } 1642 for (l=0; l<s; l++) { 1643 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1644 } 1645 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1646 row++; 1647 } 1648 } 1649 /* coupling with process to the right */ 1650 for (i=xs+nx-s; i<xs+nx; i++) { 1651 for (j=0; j<nc; j++) { 1652 cnt = 0; 1653 for (l=0; l<s; l++) { 1654 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1655 } 1656 if (dfill) { 1657 for (k=dfill[j]; k<dfill[j+1]; k++) { 1658 cols[cnt++] = i*nc + dfill[k]; 1659 } 1660 } else { 1661 for (k=0; k<nc; k++) { 1662 cols[cnt++] = i*nc + k; 1663 } 1664 } 1665 if (rank < size-1) { 1666 for (l=0; l<s; l++) { 1667 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1668 } 1669 } 1670 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1671 for (l=0; l<s; l++) { 1672 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1673 } 1674 } 1675 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1676 row++; 1677 } 1678 } 1679 ierr = PetscFree(cols);CHKERRQ(ierr); 1680 /* do not copy values to GPU since they are all zero and not yet needed there */ 1681 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1682 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1685 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 /* ---------------------------------------------------------------------------------*/ 1691 1692 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1693 { 1694 PetscErrorCode ierr; 1695 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1696 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1697 PetscInt istart,iend; 1698 DMBoundaryType bx; 1699 ISLocalToGlobalMapping ltog,mltog; 1700 1701 PetscFunctionBegin; 1702 /* 1703 nc - number of components per grid point 1704 col - number of colors needed in one direction for single component problem 1705 1706 */ 1707 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1708 if (!isIS && bx == DM_BOUNDARY_NONE) { 1709 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1710 } 1711 col = 2*s + 1; 1712 1713 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1714 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1715 1716 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1717 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1718 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1719 1720 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1721 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1722 if (!mltog) { 1723 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1724 } 1725 1726 /* 1727 For each node in the grid: we get the neighbors in the local (on processor ordering 1728 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1729 PETSc ordering. 1730 */ 1731 if (!da->prealloc_only) { 1732 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1733 for (i=xs; i<xs+nx; i++) { 1734 istart = PetscMax(-s,gxs - i); 1735 iend = PetscMin(s,gxs + gnx - i - 1); 1736 slot = i - gxs; 1737 1738 cnt = 0; 1739 for (i1=istart; i1<iend+1; i1++) { 1740 cols[cnt++] = nc*(slot + i1); 1741 for (l=1; l<nc; l++) { 1742 cols[cnt] = 1 + cols[cnt-1];cnt++; 1743 } 1744 } 1745 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1746 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1747 } 1748 /* do not copy values to GPU since they are all zero and not yet needed there */ 1749 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1750 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1751 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1752 if (!isIS && bx == DM_BOUNDARY_NONE) { 1753 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1754 } 1755 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1756 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1757 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 /* ---------------------------------------------------------------------------------*/ 1763 1764 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1765 { 1766 PetscErrorCode ierr; 1767 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1768 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1769 PetscInt istart,iend; 1770 DMBoundaryType bx; 1771 ISLocalToGlobalMapping ltog,mltog; 1772 1773 PetscFunctionBegin; 1774 /* 1775 nc - number of components per grid point 1776 col - number of colors needed in one direction for single component problem 1777 */ 1778 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1779 col = 2*s + 1; 1780 1781 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1782 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1783 1784 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1785 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1786 1787 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1788 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1789 if (!mltog) { 1790 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1791 } 1792 1793 /* 1794 For each node in the grid: we get the neighbors in the local (on processor ordering 1795 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1796 PETSc ordering. 1797 */ 1798 if (!da->prealloc_only) { 1799 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1800 for (i=xs; i<xs+nx; i++) { 1801 istart = PetscMax(-s,gxs - i); 1802 iend = PetscMin(s,gxs + gnx - i - 1); 1803 slot = i - gxs; 1804 1805 cnt = 0; 1806 for (i1=istart; i1<iend+1; i1++) { 1807 cols[cnt++] = nc*(slot + i1); 1808 for (l=1; l<nc; l++) { 1809 cols[cnt] = 1 + cols[cnt-1];cnt++; 1810 } 1811 } 1812 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1813 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1814 } 1815 /* do not copy values to GPU since they are all zero and not yet needed there */ 1816 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1817 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 if (!isIS && bx == DM_BOUNDARY_NONE) { 1820 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1821 } 1822 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1823 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1824 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1825 } 1826 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1831 { 1832 PetscErrorCode ierr; 1833 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1834 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1835 PetscInt istart,iend,jstart,jend,ii,jj; 1836 MPI_Comm comm; 1837 PetscScalar *values; 1838 DMBoundaryType bx,by; 1839 DMDAStencilType st; 1840 ISLocalToGlobalMapping ltog; 1841 1842 PetscFunctionBegin; 1843 /* 1844 nc - number of components per grid point 1845 col - number of colors needed in one direction for single component problem 1846 */ 1847 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1848 col = 2*s + 1; 1849 1850 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1851 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1852 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1853 1854 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1855 1856 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1857 1858 /* determine the matrix preallocation information */ 1859 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1860 for (i=xs; i<xs+nx; i++) { 1861 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1862 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1863 for (j=ys; j<ys+ny; j++) { 1864 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1865 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1866 slot = i - gxs + gnx*(j - gys); 1867 1868 /* Find block columns in block row */ 1869 cnt = 0; 1870 for (ii=istart; ii<iend+1; ii++) { 1871 for (jj=jstart; jj<jend+1; jj++) { 1872 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1873 cols[cnt++] = slot + ii + gnx*jj; 1874 } 1875 } 1876 } 1877 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1878 } 1879 } 1880 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1881 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1882 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1883 1884 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1885 1886 /* 1887 For each node in the grid: we get the neighbors in the local (on processor ordering 1888 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1889 PETSc ordering. 1890 */ 1891 if (!da->prealloc_only) { 1892 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1893 for (i=xs; i<xs+nx; i++) { 1894 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1895 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1896 for (j=ys; j<ys+ny; j++) { 1897 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1898 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1899 slot = i - gxs + gnx*(j - gys); 1900 cnt = 0; 1901 for (ii=istart; ii<iend+1; ii++) { 1902 for (jj=jstart; jj<jend+1; jj++) { 1903 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1904 cols[cnt++] = slot + ii + gnx*jj; 1905 } 1906 } 1907 } 1908 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1909 } 1910 } 1911 ierr = PetscFree(values);CHKERRQ(ierr); 1912 /* do not copy values to GPU since they are all zero and not yet needed there */ 1913 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1914 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1915 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1916 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1917 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1918 } 1919 ierr = PetscFree(cols);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1924 { 1925 PetscErrorCode ierr; 1926 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1927 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1928 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1929 MPI_Comm comm; 1930 PetscScalar *values; 1931 DMBoundaryType bx,by,bz; 1932 DMDAStencilType st; 1933 ISLocalToGlobalMapping ltog; 1934 1935 PetscFunctionBegin; 1936 /* 1937 nc - number of components per grid point 1938 col - number of colors needed in one direction for single component problem 1939 1940 */ 1941 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1942 col = 2*s + 1; 1943 1944 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1945 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1946 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1947 1948 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1949 1950 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1951 1952 /* determine the matrix preallocation information */ 1953 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1954 for (i=xs; i<xs+nx; i++) { 1955 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1956 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1957 for (j=ys; j<ys+ny; j++) { 1958 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1959 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1960 for (k=zs; k<zs+nz; k++) { 1961 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1962 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1963 1964 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1965 1966 /* Find block columns in block row */ 1967 cnt = 0; 1968 for (ii=istart; ii<iend+1; ii++) { 1969 for (jj=jstart; jj<jend+1; jj++) { 1970 for (kk=kstart; kk<kend+1; kk++) { 1971 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1972 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1973 } 1974 } 1975 } 1976 } 1977 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1978 } 1979 } 1980 } 1981 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1982 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1983 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1984 1985 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1986 1987 /* 1988 For each node in the grid: we get the neighbors in the local (on processor ordering 1989 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1990 PETSc ordering. 1991 */ 1992 if (!da->prealloc_only) { 1993 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1994 for (i=xs; i<xs+nx; i++) { 1995 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1996 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1997 for (j=ys; j<ys+ny; j++) { 1998 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1999 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2000 for (k=zs; k<zs+nz; k++) { 2001 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2002 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2003 2004 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2005 2006 cnt = 0; 2007 for (ii=istart; ii<iend+1; ii++) { 2008 for (jj=jstart; jj<jend+1; jj++) { 2009 for (kk=kstart; kk<kend+1; kk++) { 2010 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2011 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2012 } 2013 } 2014 } 2015 } 2016 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2017 } 2018 } 2019 } 2020 ierr = PetscFree(values);CHKERRQ(ierr); 2021 /* do not copy values to GPU since they are all zero and not yet needed there */ 2022 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2023 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2026 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(cols);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 /* 2033 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2034 identify in the local ordering with periodic domain. 2035 */ 2036 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2037 { 2038 PetscErrorCode ierr; 2039 PetscInt i,n; 2040 2041 PetscFunctionBegin; 2042 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2043 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2044 for (i=0,n=0; i<*cnt; i++) { 2045 if (col[i] >= *row) col[n++] = col[i]; 2046 } 2047 *cnt = n; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2052 { 2053 PetscErrorCode ierr; 2054 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2055 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2056 PetscInt istart,iend,jstart,jend,ii,jj; 2057 MPI_Comm comm; 2058 PetscScalar *values; 2059 DMBoundaryType bx,by; 2060 DMDAStencilType st; 2061 ISLocalToGlobalMapping ltog; 2062 2063 PetscFunctionBegin; 2064 /* 2065 nc - number of components per grid point 2066 col - number of colors needed in one direction for single component problem 2067 */ 2068 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2069 col = 2*s + 1; 2070 2071 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2072 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2073 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2074 2075 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2076 2077 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2078 2079 /* determine the matrix preallocation information */ 2080 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2081 for (i=xs; i<xs+nx; i++) { 2082 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2083 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2084 for (j=ys; j<ys+ny; j++) { 2085 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2086 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2087 slot = i - gxs + gnx*(j - gys); 2088 2089 /* Find block columns in block row */ 2090 cnt = 0; 2091 for (ii=istart; ii<iend+1; ii++) { 2092 for (jj=jstart; jj<jend+1; jj++) { 2093 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2094 cols[cnt++] = slot + ii + gnx*jj; 2095 } 2096 } 2097 } 2098 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2099 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2100 } 2101 } 2102 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2103 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2104 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2105 2106 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2107 2108 /* 2109 For each node in the grid: we get the neighbors in the local (on processor ordering 2110 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2111 PETSc ordering. 2112 */ 2113 if (!da->prealloc_only) { 2114 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2115 for (i=xs; i<xs+nx; i++) { 2116 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2117 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2118 for (j=ys; j<ys+ny; j++) { 2119 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2120 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2121 slot = i - gxs + gnx*(j - gys); 2122 2123 /* Find block columns in block row */ 2124 cnt = 0; 2125 for (ii=istart; ii<iend+1; ii++) { 2126 for (jj=jstart; jj<jend+1; jj++) { 2127 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2128 cols[cnt++] = slot + ii + gnx*jj; 2129 } 2130 } 2131 } 2132 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2133 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 } 2136 ierr = PetscFree(values);CHKERRQ(ierr); 2137 /* do not copy values to GPU since they are all zero and not yet needed there */ 2138 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2139 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2140 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2142 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2143 } 2144 ierr = PetscFree(cols);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2149 { 2150 PetscErrorCode ierr; 2151 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2152 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2153 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2154 MPI_Comm comm; 2155 PetscScalar *values; 2156 DMBoundaryType bx,by,bz; 2157 DMDAStencilType st; 2158 ISLocalToGlobalMapping ltog; 2159 2160 PetscFunctionBegin; 2161 /* 2162 nc - number of components per grid point 2163 col - number of colors needed in one direction for single component problem 2164 */ 2165 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2166 col = 2*s + 1; 2167 2168 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2169 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2170 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2171 2172 /* create the matrix */ 2173 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2174 2175 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2176 2177 /* determine the matrix preallocation information */ 2178 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2179 for (i=xs; i<xs+nx; i++) { 2180 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2181 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2182 for (j=ys; j<ys+ny; j++) { 2183 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2184 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2185 for (k=zs; k<zs+nz; k++) { 2186 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2187 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2188 2189 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2190 2191 /* Find block columns in block row */ 2192 cnt = 0; 2193 for (ii=istart; ii<iend+1; ii++) { 2194 for (jj=jstart; jj<jend+1; jj++) { 2195 for (kk=kstart; kk<kend+1; kk++) { 2196 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2197 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2198 } 2199 } 2200 } 2201 } 2202 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2203 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2204 } 2205 } 2206 } 2207 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2208 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2209 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2210 2211 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2212 2213 /* 2214 For each node in the grid: we get the neighbors in the local (on processor ordering 2215 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2216 PETSc ordering. 2217 */ 2218 if (!da->prealloc_only) { 2219 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2220 for (i=xs; i<xs+nx; i++) { 2221 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2222 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2223 for (j=ys; j<ys+ny; j++) { 2224 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2225 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2226 for (k=zs; k<zs+nz; k++) { 2227 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2228 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2229 2230 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2231 2232 cnt = 0; 2233 for (ii=istart; ii<iend+1; ii++) { 2234 for (jj=jstart; jj<jend+1; jj++) { 2235 for (kk=kstart; kk<kend+1; kk++) { 2236 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2237 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2238 } 2239 } 2240 } 2241 } 2242 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2243 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 } 2246 } 2247 ierr = PetscFree(values);CHKERRQ(ierr); 2248 /* do not copy values to GPU since they are all zero and not yet needed there */ 2249 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2250 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2253 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2254 } 2255 ierr = PetscFree(cols);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 /* ---------------------------------------------------------------------------------*/ 2260 2261 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2262 { 2263 PetscErrorCode ierr; 2264 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2265 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2266 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2267 DM_DA *dd = (DM_DA*)da->data; 2268 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2269 MPI_Comm comm; 2270 PetscScalar *values; 2271 DMBoundaryType bx,by,bz; 2272 ISLocalToGlobalMapping ltog; 2273 DMDAStencilType st; 2274 PetscBool removedups = PETSC_FALSE; 2275 2276 PetscFunctionBegin; 2277 /* 2278 nc - number of components per grid point 2279 col - number of colors needed in one direction for single component problem 2280 2281 */ 2282 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2283 col = 2*s + 1; 2284 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2285 by 2*stencil_width + 1\n"); 2286 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2287 by 2*stencil_width + 1\n"); 2288 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2289 by 2*stencil_width + 1\n"); 2290 2291 /* 2292 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2293 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2294 */ 2295 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2296 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2297 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2298 2299 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2300 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2301 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2302 2303 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2304 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2305 2306 /* determine the matrix preallocation information */ 2307 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2308 2309 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2310 for (i=xs; i<xs+nx; i++) { 2311 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2312 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2313 for (j=ys; j<ys+ny; j++) { 2314 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2315 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2316 for (k=zs; k<zs+nz; k++) { 2317 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2318 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2319 2320 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2321 2322 for (l=0; l<nc; l++) { 2323 cnt = 0; 2324 for (ii=istart; ii<iend+1; ii++) { 2325 for (jj=jstart; jj<jend+1; jj++) { 2326 for (kk=kstart; kk<kend+1; kk++) { 2327 if (ii || jj || kk) { 2328 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2329 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2330 } 2331 } else { 2332 if (dfill) { 2333 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2334 } else { 2335 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2336 } 2337 } 2338 } 2339 } 2340 } 2341 row = l + nc*(slot); 2342 maxcnt = PetscMax(maxcnt,cnt); 2343 if (removedups) { 2344 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2345 } else { 2346 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2347 } 2348 } 2349 } 2350 } 2351 } 2352 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2353 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2354 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2355 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2356 2357 /* 2358 For each node in the grid: we get the neighbors in the local (on processor ordering 2359 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2360 PETSc ordering. 2361 */ 2362 if (!da->prealloc_only) { 2363 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2364 for (i=xs; i<xs+nx; i++) { 2365 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2366 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2367 for (j=ys; j<ys+ny; j++) { 2368 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2369 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2370 for (k=zs; k<zs+nz; k++) { 2371 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2372 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2373 2374 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2375 2376 for (l=0; l<nc; l++) { 2377 cnt = 0; 2378 for (ii=istart; ii<iend+1; ii++) { 2379 for (jj=jstart; jj<jend+1; jj++) { 2380 for (kk=kstart; kk<kend+1; kk++) { 2381 if (ii || jj || kk) { 2382 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2383 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2384 } 2385 } else { 2386 if (dfill) { 2387 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2388 } else { 2389 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2390 } 2391 } 2392 } 2393 } 2394 } 2395 row = l + nc*(slot); 2396 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2397 } 2398 } 2399 } 2400 } 2401 ierr = PetscFree(values);CHKERRQ(ierr); 2402 /* do not copy values to GPU since they are all zero and not yet needed there */ 2403 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2404 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2405 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2406 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2407 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2408 } 2409 ierr = PetscFree(cols);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412