1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on da 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on da 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 314 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\ 315 by 2*stencil_width + 1 (%d)\n", m, col); 316 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\ 317 by 2*stencil_width + 1 (%d)\n", n, col); 318 if (ctype == IS_COLORING_GLOBAL) { 319 if (!dd->localcoloring) { 320 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 321 ii = 0; 322 for (j=ys; j<ys+ny; j++) { 323 for (i=xs; i<xs+nx; i++) { 324 for (k=0; k<nc; k++) { 325 colors[ii++] = k + nc*((i % col) + col*(j % col)); 326 } 327 } 328 } 329 ncolors = nc + nc*(col-1 + col*(col-1)); 330 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 331 } 332 *coloring = dd->localcoloring; 333 } else if (ctype == IS_COLORING_LOCAL) { 334 if (!dd->ghostedcoloring) { 335 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 336 ii = 0; 337 for (j=gys; j<gys+gny; j++) { 338 for (i=gxs; i<gxs+gnx; i++) { 339 for (k=0; k<nc; k++) { 340 /* the complicated stuff is to handle periodic boundaries */ 341 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 342 } 343 } 344 } 345 ncolors = nc + nc*(col - 1 + col*(col-1)); 346 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 347 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 348 349 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 350 } 351 *coloring = dd->ghostedcoloring; 352 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 353 } 354 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /* ---------------------------------------------------------------------------------*/ 359 360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 361 { 362 PetscErrorCode ierr; 363 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 364 PetscInt ncolors; 365 MPI_Comm comm; 366 DMBoundaryType bx,by,bz; 367 DMDAStencilType st; 368 ISColoringValue *colors; 369 DM_DA *dd = (DM_DA*)da->data; 370 371 PetscFunctionBegin; 372 /* 373 nc - number of components per grid point 374 col - number of colors needed in one direction for single component problem 375 376 */ 377 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 378 col = 2*s + 1; 379 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 380 by 2*stencil_width + 1\n"); 381 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 382 by 2*stencil_width + 1\n"); 383 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 384 by 2*stencil_width + 1\n"); 385 386 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 387 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 388 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 389 390 /* create the coloring */ 391 if (ctype == IS_COLORING_GLOBAL) { 392 if (!dd->localcoloring) { 393 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 394 ii = 0; 395 for (k=zs; k<zs+nz; k++) { 396 for (j=ys; j<ys+ny; j++) { 397 for (i=xs; i<xs+nx; i++) { 398 for (l=0; l<nc; l++) { 399 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 400 } 401 } 402 } 403 } 404 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 405 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 406 } 407 *coloring = dd->localcoloring; 408 } else if (ctype == IS_COLORING_LOCAL) { 409 if (!dd->ghostedcoloring) { 410 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 411 ii = 0; 412 for (k=gzs; k<gzs+gnz; k++) { 413 for (j=gys; j<gys+gny; j++) { 414 for (i=gxs; i<gxs+gnx; i++) { 415 for (l=0; l<nc; l++) { 416 /* the complicated stuff is to handle periodic boundaries */ 417 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 418 } 419 } 420 } 421 } 422 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 423 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 424 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 425 } 426 *coloring = dd->ghostedcoloring; 427 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 428 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /* ---------------------------------------------------------------------------------*/ 433 434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 435 { 436 PetscErrorCode ierr; 437 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 438 PetscInt ncolors; 439 MPI_Comm comm; 440 DMBoundaryType bx; 441 ISColoringValue *colors; 442 DM_DA *dd = (DM_DA*)da->data; 443 444 PetscFunctionBegin; 445 /* 446 nc - number of components per grid point 447 col - number of colors needed in one direction for single component problem 448 449 */ 450 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 451 col = 2*s + 1; 452 453 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\ 454 by 2*stencil_width + 1 %d\n",(int)m,(int)col); 455 456 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 457 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 458 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 459 460 /* create the coloring */ 461 if (ctype == IS_COLORING_GLOBAL) { 462 if (!dd->localcoloring) { 463 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 464 if (dd->ofillcols) { 465 PetscInt tc = 0; 466 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 467 i1 = 0; 468 for (i=xs; i<xs+nx; i++) { 469 for (l=0; l<nc; l++) { 470 if (dd->ofillcols[l] && (i % col)) { 471 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 472 } else { 473 colors[i1++] = l; 474 } 475 } 476 } 477 ncolors = nc + 2*s*tc; 478 } else { 479 i1 = 0; 480 for (i=xs; i<xs+nx; i++) { 481 for (l=0; l<nc; l++) { 482 colors[i1++] = l + nc*(i % col); 483 } 484 } 485 ncolors = nc + nc*(col-1); 486 } 487 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 488 } 489 *coloring = dd->localcoloring; 490 } else if (ctype == IS_COLORING_LOCAL) { 491 if (!dd->ghostedcoloring) { 492 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 493 i1 = 0; 494 for (i=gxs; i<gxs+gnx; i++) { 495 for (l=0; l<nc; l++) { 496 /* the complicated stuff is to handle periodic boundaries */ 497 colors[i1++] = l + nc*(SetInRange(i,m) % col); 498 } 499 } 500 ncolors = nc + nc*(col-1); 501 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 502 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 503 } 504 *coloring = dd->ghostedcoloring; 505 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 506 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 511 { 512 PetscErrorCode ierr; 513 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 514 PetscInt ncolors; 515 MPI_Comm comm; 516 DMBoundaryType bx,by; 517 ISColoringValue *colors; 518 DM_DA *dd = (DM_DA*)da->data; 519 520 PetscFunctionBegin; 521 /* 522 nc - number of components per grid point 523 col - number of colors needed in one direction for single component problem 524 525 */ 526 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 527 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 528 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 529 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 530 531 if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n"); 532 if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n"); 533 534 /* create the coloring */ 535 if (ctype == IS_COLORING_GLOBAL) { 536 if (!dd->localcoloring) { 537 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 538 ii = 0; 539 for (j=ys; j<ys+ny; j++) { 540 for (i=xs; i<xs+nx; i++) { 541 for (k=0; k<nc; k++) { 542 colors[ii++] = k + nc*((3*j+i) % 5); 543 } 544 } 545 } 546 ncolors = 5*nc; 547 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 548 } 549 *coloring = dd->localcoloring; 550 } else if (ctype == IS_COLORING_LOCAL) { 551 if (!dd->ghostedcoloring) { 552 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 553 ii = 0; 554 for (j=gys; j<gys+gny; j++) { 555 for (i=gxs; i<gxs+gnx; i++) { 556 for (k=0; k<nc; k++) { 557 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 558 } 559 } 560 } 561 ncolors = 5*nc; 562 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 563 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 564 } 565 *coloring = dd->ghostedcoloring; 566 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 567 PetscFunctionReturn(0); 568 } 569 570 /* =========================================================================== */ 571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 584 585 /*@C 586 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 587 588 Logically Collective on mat 589 590 Input Parameters: 591 + mat - the matrix 592 - da - the da 593 594 Level: intermediate 595 596 @*/ 597 PetscErrorCode MatSetupDM(Mat mat,DM da) 598 { 599 PetscErrorCode ierr; 600 601 PetscFunctionBegin; 602 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 603 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 604 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 609 { 610 DM da; 611 PetscErrorCode ierr; 612 const char *prefix; 613 Mat Anatural; 614 AO ao; 615 PetscInt rstart,rend,*petsc,i; 616 IS is; 617 MPI_Comm comm; 618 PetscViewerFormat format; 619 620 PetscFunctionBegin; 621 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 624 625 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 626 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 627 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 628 629 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 630 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 631 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 632 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 633 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 634 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 635 636 /* call viewer on natural ordering */ 637 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 638 ierr = ISDestroy(&is);CHKERRQ(ierr); 639 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 640 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 641 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 642 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 643 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 644 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 645 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 650 { 651 DM da; 652 PetscErrorCode ierr; 653 Mat Anatural,Aapp; 654 AO ao; 655 PetscInt rstart,rend,*app,i,m,n,M,N; 656 IS is; 657 MPI_Comm comm; 658 659 PetscFunctionBegin; 660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 661 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 662 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 663 664 /* Load the matrix in natural ordering */ 665 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 666 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 667 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 669 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 670 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 671 672 /* Map natural ordering to application ordering and create IS */ 673 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 674 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 675 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 676 for (i=rstart; i<rend; i++) app[i-rstart] = i; 677 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 678 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 679 680 /* Do permutation and replace header */ 681 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 682 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 683 ierr = ISDestroy(&is);CHKERRQ(ierr); 684 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 689 { 690 PetscErrorCode ierr; 691 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 692 Mat A; 693 MPI_Comm comm; 694 MatType Atype; 695 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 696 MatType mtype; 697 PetscMPIInt size; 698 DM_DA *dd = (DM_DA*)da->data; 699 700 PetscFunctionBegin; 701 ierr = MatInitializePackage();CHKERRQ(ierr); 702 mtype = da->mattype; 703 704 /* 705 m 706 ------------------------------------------------------ 707 | | 708 | | 709 | ---------------------- | 710 | | | | 711 n | ny | | | 712 | | | | 713 | .--------------------- | 714 | (xs,ys) nx | 715 | . | 716 | (gxs,gys) | 717 | | 718 ----------------------------------------------------- 719 */ 720 721 /* 722 nc - number of components per grid point 723 col - number of colors needed in one direction for single component problem 724 725 */ 726 M = dd->M; 727 N = dd->N; 728 P = dd->P; 729 dim = da->dim; 730 dof = dd->w; 731 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 732 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 733 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 734 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 735 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 736 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 737 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 738 ierr = MatSetDM(A,da);CHKERRQ(ierr); 739 if (da->structure_only) { 740 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 741 } 742 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 743 /* 744 We do not provide a getmatrix function in the DMDA operations because 745 the basic DMDA does not know about matrices. We think of DMDA as being more 746 more low-level than matrices. This is kind of cheating but, cause sometimes 747 we think of DMDA has higher level than matrices. 748 749 We could switch based on Atype (or mtype), but we do not since the 750 specialized setting routines depend only on the particular preallocation 751 details of the matrix, not the type itself. 752 */ 753 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 754 if (!aij) { 755 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 756 } 757 if (!aij) { 758 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 759 if (!baij) { 760 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 761 } 762 if (!baij) { 763 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 764 if (!sbaij) { 765 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 766 } 767 if (!sbaij) { 768 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 769 if (!sell) { 770 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 771 } 772 } 773 if (!sell) { 774 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 775 } 776 } 777 } 778 if (aij) { 779 if (dim == 1) { 780 if (dd->ofill) { 781 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 782 } else { 783 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 784 } 785 } else if (dim == 2) { 786 if (dd->ofill) { 787 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 788 } else { 789 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 790 } 791 } else if (dim == 3) { 792 if (dd->ofill) { 793 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 794 } else { 795 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 796 } 797 } 798 } else if (baij) { 799 if (dim == 2) { 800 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 801 } else if (dim == 3) { 802 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 803 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 804 } else if (sbaij) { 805 if (dim == 2) { 806 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 807 } else if (dim == 3) { 808 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 809 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 810 } else if (sell) { 811 if (dim == 2) { 812 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 813 } else if (dim == 3) { 814 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 815 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 816 } else if (is) { 817 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 818 } else { 819 ISLocalToGlobalMapping ltog; 820 821 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 822 ierr = MatSetUp(A);CHKERRQ(ierr); 823 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 824 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 825 } 826 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 827 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 828 ierr = MatSetDM(A,da);CHKERRQ(ierr); 829 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 830 if (size > 1) { 831 /* change viewer to display matrix in natural ordering */ 832 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 833 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 834 } 835 *J = A; 836 PetscFunctionReturn(0); 837 } 838 839 /* ---------------------------------------------------------------------------------*/ 840 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 841 842 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 843 { 844 DM_DA *da = (DM_DA*)dm->data; 845 Mat lJ; 846 ISLocalToGlobalMapping ltog; 847 IS is_loc_filt, is_glob; 848 const PetscInt *e_loc,*idx; 849 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 850 PetscBool flg; 851 PetscErrorCode ierr; 852 853 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 854 We need to filter the local indices that are represented through the DMDAGetElements decomposition 855 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 856 PetscFunctionBegin; 857 dof = da->w; 858 dim = dm->dim; 859 860 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 861 862 /* get local elements indices in local DMDA numbering */ 863 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 864 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 865 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 866 867 /* obtain a consistent local ordering for MATIS */ 868 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 869 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 870 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 871 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 872 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 873 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 874 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 875 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 876 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 877 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 878 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 879 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 880 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 881 882 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 883 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 884 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 885 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 886 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 887 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 888 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 889 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 890 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 891 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 892 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 893 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 894 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 895 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 896 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 897 ierr = PetscFree(gidx);CHKERRQ(ierr); 898 899 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 900 flg = dm->prealloc_only; 901 dm->prealloc_only = PETSC_TRUE; 902 switch (dim) { 903 case 1: 904 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 905 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 907 break; 908 case 2: 909 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 910 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 912 break; 913 case 3: 914 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 915 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 917 break; 918 default: 919 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 920 break; 921 } 922 dm->prealloc_only = flg; 923 PetscFunctionReturn(0); 924 } 925 926 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 927 { 928 PetscErrorCode ierr; 929 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 930 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 931 MPI_Comm comm; 932 PetscScalar *values; 933 DMBoundaryType bx,by; 934 ISLocalToGlobalMapping ltog; 935 DMDAStencilType st; 936 937 PetscFunctionBegin; 938 /* 939 nc - number of components per grid point 940 col - number of colors needed in one direction for single component problem 941 942 */ 943 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 944 col = 2*s + 1; 945 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 946 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 947 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 948 949 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 950 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 951 952 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 953 /* determine the matrix preallocation information */ 954 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 955 for (i=xs; i<xs+nx; i++) { 956 957 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 958 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 959 960 for (j=ys; j<ys+ny; j++) { 961 slot = i - gxs + gnx*(j - gys); 962 963 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 964 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 965 966 cnt = 0; 967 for (k=0; k<nc; k++) { 968 for (l=lstart; l<lend+1; l++) { 969 for (p=pstart; p<pend+1; p++) { 970 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 971 cols[cnt++] = k + nc*(slot + gnx*l + p); 972 } 973 } 974 } 975 rows[k] = k + nc*(slot); 976 } 977 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 978 } 979 } 980 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 981 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 982 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 983 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 984 985 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 986 987 /* 988 For each node in the grid: we get the neighbors in the local (on processor ordering 989 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 990 PETSc ordering. 991 */ 992 if (!da->prealloc_only) { 993 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 994 for (i=xs; i<xs+nx; i++) { 995 996 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 997 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 998 999 for (j=ys; j<ys+ny; j++) { 1000 slot = i - gxs + gnx*(j - gys); 1001 1002 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1003 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1004 1005 cnt = 0; 1006 for (k=0; k<nc; k++) { 1007 for (l=lstart; l<lend+1; l++) { 1008 for (p=pstart; p<pend+1; p++) { 1009 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1010 cols[cnt++] = k + nc*(slot + gnx*l + p); 1011 } 1012 } 1013 } 1014 rows[k] = k + nc*(slot); 1015 } 1016 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1017 } 1018 } 1019 ierr = PetscFree(values);CHKERRQ(ierr); 1020 /* do not copy values to GPU since they are all zero and not yet needed there */ 1021 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1022 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1025 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1026 } 1027 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1032 { 1033 PetscErrorCode ierr; 1034 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1035 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1036 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1037 MPI_Comm comm; 1038 PetscScalar *values; 1039 DMBoundaryType bx,by,bz; 1040 ISLocalToGlobalMapping ltog; 1041 DMDAStencilType st; 1042 1043 PetscFunctionBegin; 1044 /* 1045 nc - number of components per grid point 1046 col - number of colors needed in one direction for single component problem 1047 1048 */ 1049 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1050 col = 2*s + 1; 1051 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1052 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1053 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1054 1055 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1056 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1057 1058 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1059 /* determine the matrix preallocation information */ 1060 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1061 for (i=xs; i<xs+nx; i++) { 1062 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1063 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1064 for (j=ys; j<ys+ny; j++) { 1065 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1066 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1067 for (k=zs; k<zs+nz; k++) { 1068 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1069 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1070 1071 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1072 1073 cnt = 0; 1074 for (l=0; l<nc; l++) { 1075 for (ii=istart; ii<iend+1; ii++) { 1076 for (jj=jstart; jj<jend+1; jj++) { 1077 for (kk=kstart; kk<kend+1; kk++) { 1078 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1079 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1080 } 1081 } 1082 } 1083 } 1084 rows[l] = l + nc*(slot); 1085 } 1086 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1087 } 1088 } 1089 } 1090 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1091 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1092 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1093 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1094 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1095 1096 /* 1097 For each node in the grid: we get the neighbors in the local (on processor ordering 1098 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1099 PETSc ordering. 1100 */ 1101 if (!da->prealloc_only) { 1102 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1103 for (i=xs; i<xs+nx; i++) { 1104 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1105 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1106 for (j=ys; j<ys+ny; j++) { 1107 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1108 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1109 for (k=zs; k<zs+nz; k++) { 1110 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1111 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1112 1113 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1114 1115 cnt = 0; 1116 for (l=0; l<nc; l++) { 1117 for (ii=istart; ii<iend+1; ii++) { 1118 for (jj=jstart; jj<jend+1; jj++) { 1119 for (kk=kstart; kk<kend+1; kk++) { 1120 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1121 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1122 } 1123 } 1124 } 1125 } 1126 rows[l] = l + nc*(slot); 1127 } 1128 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1129 } 1130 } 1131 } 1132 ierr = PetscFree(values);CHKERRQ(ierr); 1133 /* do not copy values to GPU since they are all zero and not yet needed there */ 1134 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1135 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1136 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1137 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1138 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1139 } 1140 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1145 { 1146 PetscErrorCode ierr; 1147 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1148 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1149 MPI_Comm comm; 1150 DMBoundaryType bx,by; 1151 ISLocalToGlobalMapping ltog,mltog; 1152 DMDAStencilType st; 1153 PetscBool removedups = PETSC_FALSE; 1154 1155 PetscFunctionBegin; 1156 /* 1157 nc - number of components per grid point 1158 col - number of colors needed in one direction for single component problem 1159 1160 */ 1161 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1162 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1163 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1164 } 1165 col = 2*s + 1; 1166 /* 1167 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1168 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1169 */ 1170 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1171 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1172 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1173 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1175 1176 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1177 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1178 1179 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1180 /* determine the matrix preallocation information */ 1181 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1182 for (i=xs; i<xs+nx; i++) { 1183 1184 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1185 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1186 1187 for (j=ys; j<ys+ny; j++) { 1188 slot = i - gxs + gnx*(j - gys); 1189 1190 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1191 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1192 1193 cnt = 0; 1194 for (k=0; k<nc; k++) { 1195 for (l=lstart; l<lend+1; l++) { 1196 for (p=pstart; p<pend+1; p++) { 1197 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1198 cols[cnt++] = k + nc*(slot + gnx*l + p); 1199 } 1200 } 1201 } 1202 rows[k] = k + nc*(slot); 1203 } 1204 if (removedups) { 1205 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1206 } else { 1207 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1208 } 1209 } 1210 } 1211 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1212 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1213 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1214 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1215 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1216 if (!mltog) { 1217 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1218 } 1219 1220 /* 1221 For each node in the grid: we get the neighbors in the local (on processor ordering 1222 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1223 PETSc ordering. 1224 */ 1225 if (!da->prealloc_only) { 1226 for (i=xs; i<xs+nx; i++) { 1227 1228 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1229 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1230 1231 for (j=ys; j<ys+ny; j++) { 1232 slot = i - gxs + gnx*(j - gys); 1233 1234 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1235 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1236 1237 cnt = 0; 1238 for (l=lstart; l<lend+1; l++) { 1239 for (p=pstart; p<pend+1; p++) { 1240 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1241 cols[cnt++] = nc*(slot + gnx*l + p); 1242 for (k=1; k<nc; k++) { 1243 cols[cnt] = 1 + cols[cnt-1];cnt++; 1244 } 1245 } 1246 } 1247 } 1248 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1249 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1250 } 1251 } 1252 /* do not copy values to GPU since they are all zero and not yet needed there */ 1253 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1254 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1255 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1256 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1257 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1258 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1259 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1260 } 1261 } 1262 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1267 { 1268 PetscErrorCode ierr; 1269 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1270 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1271 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1272 DM_DA *dd = (DM_DA*)da->data; 1273 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1274 MPI_Comm comm; 1275 DMBoundaryType bx,by; 1276 ISLocalToGlobalMapping ltog; 1277 DMDAStencilType st; 1278 PetscBool removedups = PETSC_FALSE; 1279 1280 PetscFunctionBegin; 1281 /* 1282 nc - number of components per grid point 1283 col - number of colors needed in one direction for single component problem 1284 1285 */ 1286 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1287 col = 2*s + 1; 1288 /* 1289 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1290 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1291 */ 1292 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1293 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1294 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1295 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1297 1298 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1299 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1300 1301 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1302 /* determine the matrix preallocation information */ 1303 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1304 for (i=xs; i<xs+nx; i++) { 1305 1306 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1307 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1308 1309 for (j=ys; j<ys+ny; j++) { 1310 slot = i - gxs + gnx*(j - gys); 1311 1312 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1313 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1314 1315 for (k=0; k<nc; k++) { 1316 cnt = 0; 1317 for (l=lstart; l<lend+1; l++) { 1318 for (p=pstart; p<pend+1; p++) { 1319 if (l || p) { 1320 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1321 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1322 } 1323 } else { 1324 if (dfill) { 1325 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1326 } else { 1327 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1328 } 1329 } 1330 } 1331 } 1332 row = k + nc*(slot); 1333 maxcnt = PetscMax(maxcnt,cnt); 1334 if (removedups) { 1335 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1336 } else { 1337 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1338 } 1339 } 1340 } 1341 } 1342 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1343 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1344 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1345 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1346 1347 /* 1348 For each node in the grid: we get the neighbors in the local (on processor ordering 1349 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1350 PETSc ordering. 1351 */ 1352 if (!da->prealloc_only) { 1353 for (i=xs; i<xs+nx; i++) { 1354 1355 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1356 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1357 1358 for (j=ys; j<ys+ny; j++) { 1359 slot = i - gxs + gnx*(j - gys); 1360 1361 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1362 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1363 1364 for (k=0; k<nc; k++) { 1365 cnt = 0; 1366 for (l=lstart; l<lend+1; l++) { 1367 for (p=pstart; p<pend+1; p++) { 1368 if (l || p) { 1369 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1370 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1371 } 1372 } else { 1373 if (dfill) { 1374 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1375 } else { 1376 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1377 } 1378 } 1379 } 1380 } 1381 row = k + nc*(slot); 1382 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1383 } 1384 } 1385 } 1386 /* do not copy values to GPU since they are all zero and not yet needed there */ 1387 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1391 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscFree(cols);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* ---------------------------------------------------------------------------------*/ 1398 1399 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1400 { 1401 PetscErrorCode ierr; 1402 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1403 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1404 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1405 MPI_Comm comm; 1406 DMBoundaryType bx,by,bz; 1407 ISLocalToGlobalMapping ltog,mltog; 1408 DMDAStencilType st; 1409 PetscBool removedups = PETSC_FALSE; 1410 1411 PetscFunctionBegin; 1412 /* 1413 nc - number of components per grid point 1414 col - number of colors needed in one direction for single component problem 1415 1416 */ 1417 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1418 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1419 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1420 } 1421 col = 2*s + 1; 1422 1423 /* 1424 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1425 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1426 */ 1427 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1428 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1429 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1430 1431 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1432 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1433 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1434 1435 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1436 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1437 1438 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1439 /* determine the matrix preallocation information */ 1440 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1441 for (i=xs; i<xs+nx; i++) { 1442 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1443 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1444 for (j=ys; j<ys+ny; j++) { 1445 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1446 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1447 for (k=zs; k<zs+nz; k++) { 1448 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1449 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1450 1451 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1452 1453 cnt = 0; 1454 for (l=0; l<nc; l++) { 1455 for (ii=istart; ii<iend+1; ii++) { 1456 for (jj=jstart; jj<jend+1; jj++) { 1457 for (kk=kstart; kk<kend+1; kk++) { 1458 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1459 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1460 } 1461 } 1462 } 1463 } 1464 rows[l] = l + nc*(slot); 1465 } 1466 if (removedups) { 1467 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1468 } else { 1469 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1470 } 1471 } 1472 } 1473 } 1474 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1475 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1476 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1477 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1478 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1479 if (!mltog) { 1480 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1481 } 1482 1483 /* 1484 For each node in the grid: we get the neighbors in the local (on processor ordering 1485 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1486 PETSc ordering. 1487 */ 1488 if (!da->prealloc_only) { 1489 for (i=xs; i<xs+nx; i++) { 1490 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1491 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1492 for (j=ys; j<ys+ny; j++) { 1493 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1494 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1495 for (k=zs; k<zs+nz; k++) { 1496 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1497 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1498 1499 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1500 1501 cnt = 0; 1502 for (kk=kstart; kk<kend+1; kk++) { 1503 for (jj=jstart; jj<jend+1; jj++) { 1504 for (ii=istart; ii<iend+1; ii++) { 1505 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1506 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1507 for (l=1; l<nc; l++) { 1508 cols[cnt] = 1 + cols[cnt-1];cnt++; 1509 } 1510 } 1511 } 1512 } 1513 } 1514 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1515 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1516 } 1517 } 1518 } 1519 /* do not copy values to GPU since they are all zero and not yet needed there */ 1520 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1521 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1522 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1523 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1524 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1525 } 1526 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1527 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1528 } 1529 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /* ---------------------------------------------------------------------------------*/ 1534 1535 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1536 { 1537 PetscErrorCode ierr; 1538 DM_DA *dd = (DM_DA*)da->data; 1539 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1540 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1541 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1542 DMBoundaryType bx; 1543 ISLocalToGlobalMapping ltog; 1544 PetscMPIInt rank,size; 1545 1546 PetscFunctionBegin; 1547 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1548 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1549 1550 /* 1551 nc - number of components per grid point 1552 1553 */ 1554 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1555 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1556 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1557 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1558 1559 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1560 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1561 1562 /* 1563 note should be smaller for first and last process with no periodic 1564 does not handle dfill 1565 */ 1566 cnt = 0; 1567 /* coupling with process to the left */ 1568 for (i=0; i<s; i++) { 1569 for (j=0; j<nc; j++) { 1570 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1571 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1572 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1573 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1574 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1575 } 1576 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1577 cnt++; 1578 } 1579 } 1580 for (i=s; i<nx-s; i++) { 1581 for (j=0; j<nc; j++) { 1582 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1583 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1584 cnt++; 1585 } 1586 } 1587 /* coupling with process to the right */ 1588 for (i=nx-s; i<nx; i++) { 1589 for (j=0; j<nc; j++) { 1590 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1591 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1592 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1593 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1594 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1595 } 1596 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1597 cnt++; 1598 } 1599 } 1600 1601 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1602 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1603 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1604 1605 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1606 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1607 1608 /* 1609 For each node in the grid: we get the neighbors in the local (on processor ordering 1610 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1611 PETSc ordering. 1612 */ 1613 if (!da->prealloc_only) { 1614 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1615 row = xs*nc; 1616 /* coupling with process to the left */ 1617 for (i=xs; i<xs+s; i++) { 1618 for (j=0; j<nc; j++) { 1619 cnt = 0; 1620 if (rank) { 1621 for (l=0; l<s; l++) { 1622 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1623 } 1624 } 1625 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1626 for (l=0; l<s; l++) { 1627 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1628 } 1629 } 1630 if (dfill) { 1631 for (k=dfill[j]; k<dfill[j+1]; k++) { 1632 cols[cnt++] = i*nc + dfill[k]; 1633 } 1634 } else { 1635 for (k=0; k<nc; k++) { 1636 cols[cnt++] = i*nc + k; 1637 } 1638 } 1639 for (l=0; l<s; l++) { 1640 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1641 } 1642 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1643 row++; 1644 } 1645 } 1646 for (i=xs+s; i<xs+nx-s; i++) { 1647 for (j=0; j<nc; j++) { 1648 cnt = 0; 1649 for (l=0; l<s; l++) { 1650 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1651 } 1652 if (dfill) { 1653 for (k=dfill[j]; k<dfill[j+1]; k++) { 1654 cols[cnt++] = i*nc + dfill[k]; 1655 } 1656 } else { 1657 for (k=0; k<nc; k++) { 1658 cols[cnt++] = i*nc + k; 1659 } 1660 } 1661 for (l=0; l<s; l++) { 1662 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1663 } 1664 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1665 row++; 1666 } 1667 } 1668 /* coupling with process to the right */ 1669 for (i=xs+nx-s; i<xs+nx; i++) { 1670 for (j=0; j<nc; j++) { 1671 cnt = 0; 1672 for (l=0; l<s; l++) { 1673 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1674 } 1675 if (dfill) { 1676 for (k=dfill[j]; k<dfill[j+1]; k++) { 1677 cols[cnt++] = i*nc + dfill[k]; 1678 } 1679 } else { 1680 for (k=0; k<nc; k++) { 1681 cols[cnt++] = i*nc + k; 1682 } 1683 } 1684 if (rank < size-1) { 1685 for (l=0; l<s; l++) { 1686 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1687 } 1688 } 1689 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1690 for (l=0; l<s; l++) { 1691 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1692 } 1693 } 1694 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; 1696 } 1697 } 1698 ierr = PetscFree(cols);CHKERRQ(ierr); 1699 /* do not copy values to GPU since they are all zero and not yet needed there */ 1700 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1701 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1703 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1704 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 /* ---------------------------------------------------------------------------------*/ 1710 1711 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1712 { 1713 PetscErrorCode ierr; 1714 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1715 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1716 PetscInt istart,iend; 1717 DMBoundaryType bx; 1718 ISLocalToGlobalMapping ltog,mltog; 1719 1720 PetscFunctionBegin; 1721 /* 1722 nc - number of components per grid point 1723 col - number of colors needed in one direction for single component problem 1724 1725 */ 1726 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1727 if (!isIS && bx == DM_BOUNDARY_NONE) { 1728 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1729 } 1730 col = 2*s + 1; 1731 1732 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1733 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1734 1735 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1736 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1737 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1738 1739 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1740 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1741 if (!mltog) { 1742 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1743 } 1744 1745 /* 1746 For each node in the grid: we get the neighbors in the local (on processor ordering 1747 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1748 PETSc ordering. 1749 */ 1750 if (!da->prealloc_only) { 1751 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1752 for (i=xs; i<xs+nx; i++) { 1753 istart = PetscMax(-s,gxs - i); 1754 iend = PetscMin(s,gxs + gnx - i - 1); 1755 slot = i - gxs; 1756 1757 cnt = 0; 1758 for (i1=istart; i1<iend+1; i1++) { 1759 cols[cnt++] = nc*(slot + i1); 1760 for (l=1; l<nc; l++) { 1761 cols[cnt] = 1 + cols[cnt-1];cnt++; 1762 } 1763 } 1764 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1765 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1766 } 1767 /* do not copy values to GPU since they are all zero and not yet needed there */ 1768 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1769 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1770 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1771 if (!isIS && bx == DM_BOUNDARY_NONE) { 1772 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1773 } 1774 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1775 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1776 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1782 { 1783 PetscErrorCode ierr; 1784 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1785 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1786 PetscInt istart,iend,jstart,jend,ii,jj; 1787 MPI_Comm comm; 1788 PetscScalar *values; 1789 DMBoundaryType bx,by; 1790 DMDAStencilType st; 1791 ISLocalToGlobalMapping ltog; 1792 1793 PetscFunctionBegin; 1794 /* 1795 nc - number of components per grid point 1796 col - number of colors needed in one direction for single component problem 1797 */ 1798 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1799 col = 2*s + 1; 1800 1801 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1802 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1803 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1804 1805 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1806 1807 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1808 1809 /* determine the matrix preallocation information */ 1810 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1811 for (i=xs; i<xs+nx; i++) { 1812 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1813 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1814 for (j=ys; j<ys+ny; j++) { 1815 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1816 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1817 slot = i - gxs + gnx*(j - gys); 1818 1819 /* Find block columns in block row */ 1820 cnt = 0; 1821 for (ii=istart; ii<iend+1; ii++) { 1822 for (jj=jstart; jj<jend+1; jj++) { 1823 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1824 cols[cnt++] = slot + ii + gnx*jj; 1825 } 1826 } 1827 } 1828 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1829 } 1830 } 1831 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1832 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1833 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1834 1835 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1836 1837 /* 1838 For each node in the grid: we get the neighbors in the local (on processor ordering 1839 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1840 PETSc ordering. 1841 */ 1842 if (!da->prealloc_only) { 1843 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1844 for (i=xs; i<xs+nx; i++) { 1845 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1846 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1847 for (j=ys; j<ys+ny; j++) { 1848 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1849 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1850 slot = i - gxs + gnx*(j - gys); 1851 cnt = 0; 1852 for (ii=istart; ii<iend+1; ii++) { 1853 for (jj=jstart; jj<jend+1; jj++) { 1854 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1855 cols[cnt++] = slot + ii + gnx*jj; 1856 } 1857 } 1858 } 1859 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1860 } 1861 } 1862 ierr = PetscFree(values);CHKERRQ(ierr); 1863 /* do not copy values to GPU since they are all zero and not yet needed there */ 1864 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1865 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1868 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1869 } 1870 ierr = PetscFree(cols);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1875 { 1876 PetscErrorCode ierr; 1877 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1878 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1879 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1880 MPI_Comm comm; 1881 PetscScalar *values; 1882 DMBoundaryType bx,by,bz; 1883 DMDAStencilType st; 1884 ISLocalToGlobalMapping ltog; 1885 1886 PetscFunctionBegin; 1887 /* 1888 nc - number of components per grid point 1889 col - number of colors needed in one direction for single component problem 1890 1891 */ 1892 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1893 col = 2*s + 1; 1894 1895 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1896 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1897 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1898 1899 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1900 1901 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1902 1903 /* determine the matrix preallocation information */ 1904 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1905 for (i=xs; i<xs+nx; i++) { 1906 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1907 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1908 for (j=ys; j<ys+ny; j++) { 1909 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1910 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1911 for (k=zs; k<zs+nz; k++) { 1912 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1913 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1914 1915 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1916 1917 /* Find block columns in block row */ 1918 cnt = 0; 1919 for (ii=istart; ii<iend+1; ii++) { 1920 for (jj=jstart; jj<jend+1; jj++) { 1921 for (kk=kstart; kk<kend+1; kk++) { 1922 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1923 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1924 } 1925 } 1926 } 1927 } 1928 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1929 } 1930 } 1931 } 1932 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1933 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1934 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1935 1936 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1937 1938 /* 1939 For each node in the grid: we get the neighbors in the local (on processor ordering 1940 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1941 PETSc ordering. 1942 */ 1943 if (!da->prealloc_only) { 1944 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1945 for (i=xs; i<xs+nx; i++) { 1946 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1947 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1948 for (j=ys; j<ys+ny; j++) { 1949 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1950 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1951 for (k=zs; k<zs+nz; k++) { 1952 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1953 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1954 1955 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1956 1957 cnt = 0; 1958 for (ii=istart; ii<iend+1; ii++) { 1959 for (jj=jstart; jj<jend+1; jj++) { 1960 for (kk=kstart; kk<kend+1; kk++) { 1961 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1962 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1963 } 1964 } 1965 } 1966 } 1967 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1968 } 1969 } 1970 } 1971 ierr = PetscFree(values);CHKERRQ(ierr); 1972 /* do not copy values to GPU since they are all zero and not yet needed there */ 1973 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1974 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1977 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1978 } 1979 ierr = PetscFree(cols);CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 /* 1984 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1985 identify in the local ordering with periodic domain. 1986 */ 1987 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 1988 { 1989 PetscErrorCode ierr; 1990 PetscInt i,n; 1991 1992 PetscFunctionBegin; 1993 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 1994 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 1995 for (i=0,n=0; i<*cnt; i++) { 1996 if (col[i] >= *row) col[n++] = col[i]; 1997 } 1998 *cnt = n; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2003 { 2004 PetscErrorCode ierr; 2005 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2006 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2007 PetscInt istart,iend,jstart,jend,ii,jj; 2008 MPI_Comm comm; 2009 PetscScalar *values; 2010 DMBoundaryType bx,by; 2011 DMDAStencilType st; 2012 ISLocalToGlobalMapping ltog; 2013 2014 PetscFunctionBegin; 2015 /* 2016 nc - number of components per grid point 2017 col - number of colors needed in one direction for single component problem 2018 */ 2019 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2020 col = 2*s + 1; 2021 2022 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2023 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2024 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2025 2026 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2027 2028 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2029 2030 /* determine the matrix preallocation information */ 2031 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2032 for (i=xs; i<xs+nx; i++) { 2033 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2034 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2035 for (j=ys; j<ys+ny; j++) { 2036 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2037 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2038 slot = i - gxs + gnx*(j - gys); 2039 2040 /* Find block columns in block row */ 2041 cnt = 0; 2042 for (ii=istart; ii<iend+1; ii++) { 2043 for (jj=jstart; jj<jend+1; jj++) { 2044 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2045 cols[cnt++] = slot + ii + gnx*jj; 2046 } 2047 } 2048 } 2049 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2050 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2051 } 2052 } 2053 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2054 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2055 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2056 2057 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2058 2059 /* 2060 For each node in the grid: we get the neighbors in the local (on processor ordering 2061 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2062 PETSc ordering. 2063 */ 2064 if (!da->prealloc_only) { 2065 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2066 for (i=xs; i<xs+nx; i++) { 2067 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2068 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2069 for (j=ys; j<ys+ny; j++) { 2070 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2071 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2072 slot = i - gxs + gnx*(j - gys); 2073 2074 /* Find block columns in block row */ 2075 cnt = 0; 2076 for (ii=istart; ii<iend+1; ii++) { 2077 for (jj=jstart; jj<jend+1; jj++) { 2078 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2079 cols[cnt++] = slot + ii + gnx*jj; 2080 } 2081 } 2082 } 2083 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2084 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2085 } 2086 } 2087 ierr = PetscFree(values);CHKERRQ(ierr); 2088 /* do not copy values to GPU since they are all zero and not yet needed there */ 2089 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2090 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2093 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2094 } 2095 ierr = PetscFree(cols);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2100 { 2101 PetscErrorCode ierr; 2102 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2103 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2104 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2105 MPI_Comm comm; 2106 PetscScalar *values; 2107 DMBoundaryType bx,by,bz; 2108 DMDAStencilType st; 2109 ISLocalToGlobalMapping ltog; 2110 2111 PetscFunctionBegin; 2112 /* 2113 nc - number of components per grid point 2114 col - number of colors needed in one direction for single component problem 2115 */ 2116 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2117 col = 2*s + 1; 2118 2119 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2120 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2121 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2122 2123 /* create the matrix */ 2124 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2125 2126 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2127 2128 /* determine the matrix preallocation information */ 2129 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2130 for (i=xs; i<xs+nx; i++) { 2131 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2132 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2133 for (j=ys; j<ys+ny; j++) { 2134 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2135 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2136 for (k=zs; k<zs+nz; k++) { 2137 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2138 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2139 2140 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2141 2142 /* Find block columns in block row */ 2143 cnt = 0; 2144 for (ii=istart; ii<iend+1; ii++) { 2145 for (jj=jstart; jj<jend+1; jj++) { 2146 for (kk=kstart; kk<kend+1; kk++) { 2147 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2148 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2149 } 2150 } 2151 } 2152 } 2153 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2154 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2155 } 2156 } 2157 } 2158 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2159 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2160 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2161 2162 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2163 2164 /* 2165 For each node in the grid: we get the neighbors in the local (on processor ordering 2166 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2167 PETSc ordering. 2168 */ 2169 if (!da->prealloc_only) { 2170 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2171 for (i=xs; i<xs+nx; i++) { 2172 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2173 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2174 for (j=ys; j<ys+ny; j++) { 2175 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2176 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2177 for (k=zs; k<zs+nz; k++) { 2178 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2179 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2180 2181 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2182 2183 cnt = 0; 2184 for (ii=istart; ii<iend+1; ii++) { 2185 for (jj=jstart; jj<jend+1; jj++) { 2186 for (kk=kstart; kk<kend+1; kk++) { 2187 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2188 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2189 } 2190 } 2191 } 2192 } 2193 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2194 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2195 } 2196 } 2197 } 2198 ierr = PetscFree(values);CHKERRQ(ierr); 2199 /* do not copy values to GPU since they are all zero and not yet needed there */ 2200 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2201 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2202 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2203 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2204 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2205 } 2206 ierr = PetscFree(cols);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 /* ---------------------------------------------------------------------------------*/ 2211 2212 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2213 { 2214 PetscErrorCode ierr; 2215 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2216 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2217 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2218 DM_DA *dd = (DM_DA*)da->data; 2219 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2220 MPI_Comm comm; 2221 PetscScalar *values; 2222 DMBoundaryType bx,by,bz; 2223 ISLocalToGlobalMapping ltog; 2224 DMDAStencilType st; 2225 PetscBool removedups = PETSC_FALSE; 2226 2227 PetscFunctionBegin; 2228 /* 2229 nc - number of components per grid point 2230 col - number of colors needed in one direction for single component problem 2231 2232 */ 2233 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2234 col = 2*s + 1; 2235 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2236 by 2*stencil_width + 1\n"); 2237 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2238 by 2*stencil_width + 1\n"); 2239 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2240 by 2*stencil_width + 1\n"); 2241 2242 /* 2243 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2244 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2245 */ 2246 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2247 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2248 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2249 2250 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2251 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2252 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2253 2254 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2255 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2256 2257 /* determine the matrix preallocation information */ 2258 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2259 2260 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2261 for (i=xs; i<xs+nx; i++) { 2262 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2263 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2264 for (j=ys; j<ys+ny; j++) { 2265 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2266 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2267 for (k=zs; k<zs+nz; k++) { 2268 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2269 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2270 2271 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2272 2273 for (l=0; l<nc; l++) { 2274 cnt = 0; 2275 for (ii=istart; ii<iend+1; ii++) { 2276 for (jj=jstart; jj<jend+1; jj++) { 2277 for (kk=kstart; kk<kend+1; kk++) { 2278 if (ii || jj || kk) { 2279 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2280 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2281 } 2282 } else { 2283 if (dfill) { 2284 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2285 } else { 2286 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2287 } 2288 } 2289 } 2290 } 2291 } 2292 row = l + nc*(slot); 2293 maxcnt = PetscMax(maxcnt,cnt); 2294 if (removedups) { 2295 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2296 } else { 2297 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2298 } 2299 } 2300 } 2301 } 2302 } 2303 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2304 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2305 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2306 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2307 2308 /* 2309 For each node in the grid: we get the neighbors in the local (on processor ordering 2310 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2311 PETSc ordering. 2312 */ 2313 if (!da->prealloc_only) { 2314 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2315 for (i=xs; i<xs+nx; i++) { 2316 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2317 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2318 for (j=ys; j<ys+ny; j++) { 2319 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2320 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2321 for (k=zs; k<zs+nz; k++) { 2322 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2323 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2324 2325 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2326 2327 for (l=0; l<nc; l++) { 2328 cnt = 0; 2329 for (ii=istart; ii<iend+1; ii++) { 2330 for (jj=jstart; jj<jend+1; jj++) { 2331 for (kk=kstart; kk<kend+1; kk++) { 2332 if (ii || jj || kk) { 2333 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2334 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2335 } 2336 } else { 2337 if (dfill) { 2338 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2339 } else { 2340 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2341 } 2342 } 2343 } 2344 } 2345 } 2346 row = l + nc*(slot); 2347 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2348 } 2349 } 2350 } 2351 } 2352 ierr = PetscFree(values);CHKERRQ(ierr); 2353 /* do not copy values to GPU since they are all zero and not yet needed there */ 2354 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2355 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2356 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2358 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2359 } 2360 ierr = PetscFree(cols);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363