1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on da 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on da 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being more 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 314 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\ 315 by 2*stencil_width + 1 (%d)\n", m, col); 316 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\ 317 by 2*stencil_width + 1 (%d)\n", n, col); 318 if (ctype == IS_COLORING_GLOBAL) { 319 if (!dd->localcoloring) { 320 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 321 ii = 0; 322 for (j=ys; j<ys+ny; j++) { 323 for (i=xs; i<xs+nx; i++) { 324 for (k=0; k<nc; k++) { 325 colors[ii++] = k + nc*((i % col) + col*(j % col)); 326 } 327 } 328 } 329 ncolors = nc + nc*(col-1 + col*(col-1)); 330 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 331 } 332 *coloring = dd->localcoloring; 333 } else if (ctype == IS_COLORING_LOCAL) { 334 if (!dd->ghostedcoloring) { 335 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 336 ii = 0; 337 for (j=gys; j<gys+gny; j++) { 338 for (i=gxs; i<gxs+gnx; i++) { 339 for (k=0; k<nc; k++) { 340 /* the complicated stuff is to handle periodic boundaries */ 341 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 342 } 343 } 344 } 345 ncolors = nc + nc*(col - 1 + col*(col-1)); 346 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 347 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 348 349 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 350 } 351 *coloring = dd->ghostedcoloring; 352 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 353 } 354 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /* ---------------------------------------------------------------------------------*/ 359 360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 361 { 362 PetscErrorCode ierr; 363 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 364 PetscInt ncolors; 365 MPI_Comm comm; 366 DMBoundaryType bx,by,bz; 367 DMDAStencilType st; 368 ISColoringValue *colors; 369 DM_DA *dd = (DM_DA*)da->data; 370 371 PetscFunctionBegin; 372 /* 373 nc - number of components per grid point 374 col - number of colors needed in one direction for single component problem 375 376 */ 377 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 378 col = 2*s + 1; 379 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 380 by 2*stencil_width + 1\n"); 381 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 382 by 2*stencil_width + 1\n"); 383 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 384 by 2*stencil_width + 1\n"); 385 386 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 387 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 388 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 389 390 /* create the coloring */ 391 if (ctype == IS_COLORING_GLOBAL) { 392 if (!dd->localcoloring) { 393 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 394 ii = 0; 395 for (k=zs; k<zs+nz; k++) { 396 for (j=ys; j<ys+ny; j++) { 397 for (i=xs; i<xs+nx; i++) { 398 for (l=0; l<nc; l++) { 399 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 400 } 401 } 402 } 403 } 404 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 405 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 406 } 407 *coloring = dd->localcoloring; 408 } else if (ctype == IS_COLORING_LOCAL) { 409 if (!dd->ghostedcoloring) { 410 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 411 ii = 0; 412 for (k=gzs; k<gzs+gnz; k++) { 413 for (j=gys; j<gys+gny; j++) { 414 for (i=gxs; i<gxs+gnx; i++) { 415 for (l=0; l<nc; l++) { 416 /* the complicated stuff is to handle periodic boundaries */ 417 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 418 } 419 } 420 } 421 } 422 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 423 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 424 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 425 } 426 *coloring = dd->ghostedcoloring; 427 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 428 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /* ---------------------------------------------------------------------------------*/ 433 434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 435 { 436 PetscErrorCode ierr; 437 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 438 PetscInt ncolors; 439 MPI_Comm comm; 440 DMBoundaryType bx; 441 ISColoringValue *colors; 442 DM_DA *dd = (DM_DA*)da->data; 443 444 PetscFunctionBegin; 445 /* 446 nc - number of components per grid point 447 col - number of colors needed in one direction for single component problem 448 449 */ 450 ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 451 col = 2*s + 1; 452 453 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\ 454 by 2*stencil_width + 1 %d\n",(int)m,(int)col); 455 456 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 457 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 458 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 459 460 /* create the coloring */ 461 if (ctype == IS_COLORING_GLOBAL) { 462 if (!dd->localcoloring) { 463 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 464 if (dd->ofillcols) { 465 PetscInt tc = 0; 466 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 467 i1 = 0; 468 for (i=xs; i<xs+nx; i++) { 469 for (l=0; l<nc; l++) { 470 if (dd->ofillcols[l] && (i % col)) { 471 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 472 } else { 473 colors[i1++] = l; 474 } 475 } 476 } 477 ncolors = nc + 2*s*tc; 478 } else { 479 i1 = 0; 480 for (i=xs; i<xs+nx; i++) { 481 for (l=0; l<nc; l++) { 482 colors[i1++] = l + nc*(i % col); 483 } 484 } 485 ncolors = nc + nc*(col-1); 486 } 487 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 488 } 489 *coloring = dd->localcoloring; 490 } else if (ctype == IS_COLORING_LOCAL) { 491 if (!dd->ghostedcoloring) { 492 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 493 i1 = 0; 494 for (i=gxs; i<gxs+gnx; i++) { 495 for (l=0; l<nc; l++) { 496 /* the complicated stuff is to handle periodic boundaries */ 497 colors[i1++] = l + nc*(SetInRange(i,m) % col); 498 } 499 } 500 ncolors = nc + nc*(col-1); 501 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 502 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 503 } 504 *coloring = dd->ghostedcoloring; 505 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 506 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 511 { 512 PetscErrorCode ierr; 513 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 514 PetscInt ncolors; 515 MPI_Comm comm; 516 DMBoundaryType bx,by; 517 ISColoringValue *colors; 518 DM_DA *dd = (DM_DA*)da->data; 519 520 PetscFunctionBegin; 521 /* 522 nc - number of components per grid point 523 col - number of colors needed in one direction for single component problem 524 525 */ 526 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr); 527 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 528 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 529 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 530 531 if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n"); 532 if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n"); 533 534 /* create the coloring */ 535 if (ctype == IS_COLORING_GLOBAL) { 536 if (!dd->localcoloring) { 537 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 538 ii = 0; 539 for (j=ys; j<ys+ny; j++) { 540 for (i=xs; i<xs+nx; i++) { 541 for (k=0; k<nc; k++) { 542 colors[ii++] = k + nc*((3*j+i) % 5); 543 } 544 } 545 } 546 ncolors = 5*nc; 547 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 548 } 549 *coloring = dd->localcoloring; 550 } else if (ctype == IS_COLORING_LOCAL) { 551 if (!dd->ghostedcoloring) { 552 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 553 ii = 0; 554 for (j=gys; j<gys+gny; j++) { 555 for (i=gxs; i<gxs+gnx; i++) { 556 for (k=0; k<nc; k++) { 557 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 558 } 559 } 560 } 561 ncolors = 5*nc; 562 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 563 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 564 } 565 *coloring = dd->ghostedcoloring; 566 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 567 PetscFunctionReturn(0); 568 } 569 570 /* =========================================================================== */ 571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 584 585 /*@C 586 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 587 588 Logically Collective on mat 589 590 Input Parameters: 591 + mat - the matrix 592 - da - the da 593 594 Level: intermediate 595 596 @*/ 597 PetscErrorCode MatSetupDM(Mat mat,DM da) 598 { 599 PetscErrorCode ierr; 600 601 PetscFunctionBegin; 602 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 603 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 604 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 609 { 610 DM da; 611 PetscErrorCode ierr; 612 const char *prefix; 613 Mat Anatural; 614 AO ao; 615 PetscInt rstart,rend,*petsc,i; 616 IS is; 617 MPI_Comm comm; 618 PetscViewerFormat format; 619 620 PetscFunctionBegin; 621 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 624 625 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 626 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 627 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 628 629 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 630 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 631 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 632 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 633 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 634 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 635 636 /* call viewer on natural ordering */ 637 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 638 ierr = ISDestroy(&is);CHKERRQ(ierr); 639 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 640 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 641 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 642 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 643 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 644 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 645 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 650 { 651 DM da; 652 PetscErrorCode ierr; 653 Mat Anatural,Aapp; 654 AO ao; 655 PetscInt rstart,rend,*app,i,m,n,M,N; 656 IS is; 657 MPI_Comm comm; 658 659 PetscFunctionBegin; 660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 661 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 662 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 663 664 /* Load the matrix in natural ordering */ 665 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 666 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 667 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 669 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 670 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 671 672 /* Map natural ordering to application ordering and create IS */ 673 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 674 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 675 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 676 for (i=rstart; i<rend; i++) app[i-rstart] = i; 677 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 678 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 679 680 /* Do permutation and replace header */ 681 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 682 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 683 ierr = ISDestroy(&is);CHKERRQ(ierr); 684 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 689 { 690 PetscErrorCode ierr; 691 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 692 Mat A; 693 MPI_Comm comm; 694 MatType Atype; 695 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 696 MatType mtype; 697 PetscMPIInt size; 698 DM_DA *dd = (DM_DA*)da->data; 699 700 PetscFunctionBegin; 701 ierr = MatInitializePackage();CHKERRQ(ierr); 702 mtype = da->mattype; 703 704 /* 705 m 706 ------------------------------------------------------ 707 | | 708 | | 709 | ---------------------- | 710 | | | | 711 n | ny | | | 712 | | | | 713 | .--------------------- | 714 | (xs,ys) nx | 715 | . | 716 | (gxs,gys) | 717 | | 718 ----------------------------------------------------- 719 */ 720 721 /* 722 nc - number of components per grid point 723 col - number of colors needed in one direction for single component problem 724 725 */ 726 M = dd->M; 727 N = dd->N; 728 P = dd->P; 729 dim = da->dim; 730 dof = dd->w; 731 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */ 732 ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr); 733 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 734 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 735 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 736 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 737 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 738 ierr = MatSetDM(A,da);CHKERRQ(ierr); 739 if (da->structure_only) { 740 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 741 } 742 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 743 /* 744 We do not provide a getmatrix function in the DMDA operations because 745 the basic DMDA does not know about matrices. We think of DMDA as being more 746 more low-level than matrices. This is kind of cheating but, cause sometimes 747 we think of DMDA has higher level than matrices. 748 749 We could switch based on Atype (or mtype), but we do not since the 750 specialized setting routines depend only on the particular preallocation 751 details of the matrix, not the type itself. 752 */ 753 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 754 if (!aij) { 755 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 756 } 757 if (!aij) { 758 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 759 if (!baij) { 760 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 761 } 762 if (!baij) { 763 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 764 if (!sbaij) { 765 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 766 } 767 if (!sbaij) { 768 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 769 if (!sell) { 770 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 771 } 772 } 773 if (!sell) { 774 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 775 } 776 } 777 } 778 if (aij) { 779 if (dim == 1) { 780 if (dd->ofill) { 781 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 782 } else { 783 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr); 784 } 785 } else if (dim == 2) { 786 if (dd->ofill) { 787 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 788 } else { 789 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr); 790 } 791 } else if (dim == 3) { 792 if (dd->ofill) { 793 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 794 } else { 795 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr); 796 } 797 } 798 } else if (baij) { 799 if (dim == 2) { 800 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 801 } else if (dim == 3) { 802 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 803 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 804 } else if (sbaij) { 805 if (dim == 2) { 806 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 807 } else if (dim == 3) { 808 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 809 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 810 } else if (sell) { 811 if (dim == 2) { 812 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 813 } else if (dim == 3) { 814 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 815 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 816 } else if (is) { 817 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 818 } else { 819 ISLocalToGlobalMapping ltog; 820 821 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 822 ierr = MatSetUp(A);CHKERRQ(ierr); 823 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 824 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 825 } 826 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 827 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 828 ierr = MatSetDM(A,da);CHKERRQ(ierr); 829 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 830 if (size > 1) { 831 /* change viewer to display matrix in natural ordering */ 832 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 833 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 834 } 835 *J = A; 836 PetscFunctionReturn(0); 837 } 838 839 /* ---------------------------------------------------------------------------------*/ 840 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 841 842 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 843 { 844 DM_DA *da = (DM_DA*)dm->data; 845 Mat lJ; 846 ISLocalToGlobalMapping ltog; 847 IS is_loc_filt, is_glob; 848 const PetscInt *e_loc,*idx; 849 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 850 PetscBool flg; 851 PetscErrorCode ierr; 852 853 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 854 We need to filter the local indices that are represented through the DMDAGetElements decomposition 855 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 856 PetscFunctionBegin; 857 dof = da->w; 858 dim = dm->dim; 859 860 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 861 862 /* get local elements indices in local DMDA numbering */ 863 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 864 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 865 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 866 867 /* obtain a consistent local ordering for MATIS */ 868 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 869 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 870 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 871 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 872 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 873 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 874 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 875 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 876 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 877 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 878 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 879 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 880 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 881 882 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 883 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 884 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 885 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 886 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 887 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 888 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 889 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 890 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 891 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 892 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 893 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 894 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 895 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 896 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 897 ierr = PetscFree(gidx);CHKERRQ(ierr); 898 899 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 900 flg = dm->prealloc_only; 901 dm->prealloc_only = PETSC_TRUE; 902 switch (dim) { 903 case 1: 904 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 905 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr); 906 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 907 break; 908 case 2: 909 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 910 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 912 break; 913 case 3: 914 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 915 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 917 break; 918 default: 919 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 920 break; 921 } 922 dm->prealloc_only = flg; 923 PetscFunctionReturn(0); 924 } 925 926 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 927 { 928 PetscErrorCode ierr; 929 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 930 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 931 MPI_Comm comm; 932 PetscScalar *values; 933 DMBoundaryType bx,by; 934 ISLocalToGlobalMapping ltog; 935 DMDAStencilType st; 936 937 PetscFunctionBegin; 938 /* 939 nc - number of components per grid point 940 col - number of colors needed in one direction for single component problem 941 942 */ 943 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 944 col = 2*s + 1; 945 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 946 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 947 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 948 949 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 950 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 951 952 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 953 /* determine the matrix preallocation information */ 954 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 955 for (i=xs; i<xs+nx; i++) { 956 957 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 958 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 959 960 for (j=ys; j<ys+ny; j++) { 961 slot = i - gxs + gnx*(j - gys); 962 963 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 964 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 965 966 cnt = 0; 967 for (k=0; k<nc; k++) { 968 for (l=lstart; l<lend+1; l++) { 969 for (p=pstart; p<pend+1; p++) { 970 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 971 cols[cnt++] = k + nc*(slot + gnx*l + p); 972 } 973 } 974 } 975 rows[k] = k + nc*(slot); 976 } 977 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 978 } 979 } 980 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 981 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 982 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 983 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 984 985 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 986 987 /* 988 For each node in the grid: we get the neighbors in the local (on processor ordering 989 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 990 PETSc ordering. 991 */ 992 if (!da->prealloc_only) { 993 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 994 for (i=xs; i<xs+nx; i++) { 995 996 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 997 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 998 999 for (j=ys; j<ys+ny; j++) { 1000 slot = i - gxs + gnx*(j - gys); 1001 1002 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1003 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1004 1005 cnt = 0; 1006 for (k=0; k<nc; k++) { 1007 for (l=lstart; l<lend+1; l++) { 1008 for (p=pstart; p<pend+1; p++) { 1009 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1010 cols[cnt++] = k + nc*(slot + gnx*l + p); 1011 } 1012 } 1013 } 1014 rows[k] = k + nc*(slot); 1015 } 1016 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1017 } 1018 } 1019 ierr = PetscFree(values);CHKERRQ(ierr); 1020 /* do not copy values to GPU since they are all zero and not yet needed there */ 1021 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1022 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1025 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1026 } 1027 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1032 { 1033 PetscErrorCode ierr; 1034 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1035 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1036 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1037 MPI_Comm comm; 1038 PetscScalar *values; 1039 DMBoundaryType bx,by,bz; 1040 ISLocalToGlobalMapping ltog; 1041 DMDAStencilType st; 1042 1043 PetscFunctionBegin; 1044 /* 1045 nc - number of components per grid point 1046 col - number of colors needed in one direction for single component problem 1047 1048 */ 1049 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1050 col = 2*s + 1; 1051 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1052 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1053 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1054 1055 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1056 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1057 1058 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1059 /* determine the matrix preallocation information */ 1060 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1061 for (i=xs; i<xs+nx; i++) { 1062 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1063 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1064 for (j=ys; j<ys+ny; j++) { 1065 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1066 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1067 for (k=zs; k<zs+nz; k++) { 1068 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1069 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1070 1071 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1072 1073 cnt = 0; 1074 for (l=0; l<nc; l++) { 1075 for (ii=istart; ii<iend+1; ii++) { 1076 for (jj=jstart; jj<jend+1; jj++) { 1077 for (kk=kstart; kk<kend+1; kk++) { 1078 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1079 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1080 } 1081 } 1082 } 1083 } 1084 rows[l] = l + nc*(slot); 1085 } 1086 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1087 } 1088 } 1089 } 1090 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1091 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1092 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1093 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1094 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1095 1096 /* 1097 For each node in the grid: we get the neighbors in the local (on processor ordering 1098 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1099 PETSc ordering. 1100 */ 1101 if (!da->prealloc_only) { 1102 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1103 for (i=xs; i<xs+nx; i++) { 1104 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1105 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1106 for (j=ys; j<ys+ny; j++) { 1107 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1108 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1109 for (k=zs; k<zs+nz; k++) { 1110 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1111 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1112 1113 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1114 1115 cnt = 0; 1116 for (l=0; l<nc; l++) { 1117 for (ii=istart; ii<iend+1; ii++) { 1118 for (jj=jstart; jj<jend+1; jj++) { 1119 for (kk=kstart; kk<kend+1; kk++) { 1120 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1121 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1122 } 1123 } 1124 } 1125 } 1126 rows[l] = l + nc*(slot); 1127 } 1128 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1129 } 1130 } 1131 } 1132 ierr = PetscFree(values);CHKERRQ(ierr); 1133 /* do not copy values to GPU since they are all zero and not yet needed there */ 1134 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1135 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1136 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1137 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1138 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1139 } 1140 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1145 { 1146 PetscErrorCode ierr; 1147 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1148 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1149 MPI_Comm comm; 1150 PetscScalar *values; 1151 DMBoundaryType bx,by; 1152 ISLocalToGlobalMapping ltog,mltog; 1153 DMDAStencilType st; 1154 PetscBool removedups = PETSC_FALSE; 1155 1156 PetscFunctionBegin; 1157 /* 1158 nc - number of components per grid point 1159 col - number of colors needed in one direction for single component problem 1160 1161 */ 1162 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1163 col = 2*s + 1; 1164 /* 1165 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1166 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1167 */ 1168 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1169 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1170 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1171 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1172 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1173 1174 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1175 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1176 1177 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1178 /* determine the matrix preallocation information */ 1179 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1180 for (i=xs; i<xs+nx; i++) { 1181 1182 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1183 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1184 1185 for (j=ys; j<ys+ny; j++) { 1186 slot = i - gxs + gnx*(j - gys); 1187 1188 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1189 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1190 1191 cnt = 0; 1192 for (k=0; k<nc; k++) { 1193 for (l=lstart; l<lend+1; l++) { 1194 for (p=pstart; p<pend+1; p++) { 1195 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1196 cols[cnt++] = k + nc*(slot + gnx*l + p); 1197 } 1198 } 1199 } 1200 rows[k] = k + nc*(slot); 1201 } 1202 if (removedups) { 1203 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1204 } else { 1205 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1206 } 1207 } 1208 } 1209 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1210 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1211 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1212 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1213 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1214 if (!mltog) { 1215 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1216 } 1217 1218 /* 1219 For each node in the grid: we get the neighbors in the local (on processor ordering 1220 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1221 PETSc ordering. 1222 */ 1223 if (!da->prealloc_only) { 1224 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1225 for (i=xs; i<xs+nx; i++) { 1226 1227 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1228 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1229 1230 for (j=ys; j<ys+ny; j++) { 1231 slot = i - gxs + gnx*(j - gys); 1232 1233 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1234 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1235 1236 cnt = 0; 1237 for (k=0; k<nc; k++) { 1238 for (l=lstart; l<lend+1; l++) { 1239 for (p=pstart; p<pend+1; p++) { 1240 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1241 cols[cnt++] = k + nc*(slot + gnx*l + p); 1242 } 1243 } 1244 } 1245 rows[k] = k + nc*(slot); 1246 } 1247 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1248 } 1249 } 1250 ierr = PetscFree(values);CHKERRQ(ierr); 1251 /* do not copy values to GPU since they are all zero and not yet needed there */ 1252 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1253 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1254 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1255 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1256 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1257 } 1258 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1263 { 1264 PetscErrorCode ierr; 1265 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1266 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1267 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1268 DM_DA *dd = (DM_DA*)da->data; 1269 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1270 MPI_Comm comm; 1271 PetscScalar *values; 1272 DMBoundaryType bx,by; 1273 ISLocalToGlobalMapping ltog; 1274 DMDAStencilType st; 1275 PetscBool removedups = PETSC_FALSE; 1276 1277 PetscFunctionBegin; 1278 /* 1279 nc - number of components per grid point 1280 col - number of colors needed in one direction for single component problem 1281 1282 */ 1283 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1284 col = 2*s + 1; 1285 /* 1286 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1287 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1288 */ 1289 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1290 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1291 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1292 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1293 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1294 1295 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1296 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1297 1298 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1299 /* determine the matrix preallocation information */ 1300 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1301 for (i=xs; i<xs+nx; i++) { 1302 1303 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1304 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1305 1306 for (j=ys; j<ys+ny; j++) { 1307 slot = i - gxs + gnx*(j - gys); 1308 1309 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1310 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1311 1312 for (k=0; k<nc; k++) { 1313 cnt = 0; 1314 for (l=lstart; l<lend+1; l++) { 1315 for (p=pstart; p<pend+1; p++) { 1316 if (l || p) { 1317 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1318 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1319 } 1320 } else { 1321 if (dfill) { 1322 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1323 } else { 1324 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1325 } 1326 } 1327 } 1328 } 1329 row = k + nc*(slot); 1330 maxcnt = PetscMax(maxcnt,cnt); 1331 if (removedups) { 1332 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1333 } else { 1334 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1335 } 1336 } 1337 } 1338 } 1339 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1340 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1341 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1342 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1343 1344 /* 1345 For each node in the grid: we get the neighbors in the local (on processor ordering 1346 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1347 PETSc ordering. 1348 */ 1349 if (!da->prealloc_only) { 1350 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 1351 for (i=xs; i<xs+nx; i++) { 1352 1353 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1354 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1355 1356 for (j=ys; j<ys+ny; j++) { 1357 slot = i - gxs + gnx*(j - gys); 1358 1359 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1360 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1361 1362 for (k=0; k<nc; k++) { 1363 cnt = 0; 1364 for (l=lstart; l<lend+1; l++) { 1365 for (p=pstart; p<pend+1; p++) { 1366 if (l || p) { 1367 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1368 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1369 } 1370 } else { 1371 if (dfill) { 1372 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1373 } else { 1374 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1375 } 1376 } 1377 } 1378 } 1379 row = k + nc*(slot); 1380 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1381 } 1382 } 1383 } 1384 ierr = PetscFree(values);CHKERRQ(ierr); 1385 /* do not copy values to GPU since they are all zero and not yet needed there */ 1386 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1387 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1390 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1391 } 1392 ierr = PetscFree(cols);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /* ---------------------------------------------------------------------------------*/ 1397 1398 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1399 { 1400 PetscErrorCode ierr; 1401 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1402 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1403 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1404 MPI_Comm comm; 1405 PetscScalar *values; 1406 DMBoundaryType bx,by,bz; 1407 ISLocalToGlobalMapping ltog,mltog; 1408 DMDAStencilType st; 1409 PetscBool removedups = PETSC_FALSE; 1410 1411 PetscFunctionBegin; 1412 /* 1413 nc - number of components per grid point 1414 col - number of colors needed in one direction for single component problem 1415 1416 */ 1417 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1418 col = 2*s + 1; 1419 1420 /* 1421 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1422 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1423 */ 1424 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1425 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1426 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1427 1428 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1429 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1430 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1431 1432 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1433 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1434 1435 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1436 /* determine the matrix preallocation information */ 1437 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1438 for (i=xs; i<xs+nx; i++) { 1439 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1440 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1441 for (j=ys; j<ys+ny; j++) { 1442 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1443 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1444 for (k=zs; k<zs+nz; k++) { 1445 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1446 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1447 1448 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1449 1450 cnt = 0; 1451 for (l=0; l<nc; l++) { 1452 for (ii=istart; ii<iend+1; ii++) { 1453 for (jj=jstart; jj<jend+1; jj++) { 1454 for (kk=kstart; kk<kend+1; kk++) { 1455 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1456 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1457 } 1458 } 1459 } 1460 } 1461 rows[l] = l + nc*(slot); 1462 } 1463 if (removedups) { 1464 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1465 } else { 1466 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1467 } 1468 } 1469 } 1470 } 1471 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1472 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1473 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1474 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1475 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1476 if (!mltog) { 1477 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1478 } 1479 1480 /* 1481 For each node in the grid: we get the neighbors in the local (on processor ordering 1482 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1483 PETSc ordering. 1484 */ 1485 if (!da->prealloc_only) { 1486 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1487 for (i=xs; i<xs+nx; i++) { 1488 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1489 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1490 for (j=ys; j<ys+ny; j++) { 1491 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1492 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1493 for (k=zs; k<zs+nz; k++) { 1494 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1495 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1496 1497 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1498 1499 cnt = 0; 1500 for (l=0; l<nc; l++) { 1501 for (ii=istart; ii<iend+1; ii++) { 1502 for (jj=jstart; jj<jend+1; jj++) { 1503 for (kk=kstart; kk<kend+1; kk++) { 1504 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1505 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1506 } 1507 } 1508 } 1509 } 1510 rows[l] = l + nc*(slot); 1511 } 1512 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1513 } 1514 } 1515 } 1516 ierr = PetscFree(values);CHKERRQ(ierr); 1517 /* do not copy values to GPU since they are all zero and not yet needed there */ 1518 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1519 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1520 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1521 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1522 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1523 } 1524 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 /* ---------------------------------------------------------------------------------*/ 1529 1530 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1531 { 1532 PetscErrorCode ierr; 1533 DM_DA *dd = (DM_DA*)da->data; 1534 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1535 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1536 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1537 PetscScalar *values; 1538 DMBoundaryType bx; 1539 ISLocalToGlobalMapping ltog; 1540 PetscMPIInt rank,size; 1541 1542 PetscFunctionBegin; 1543 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1544 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1545 1546 /* 1547 nc - number of components per grid point 1548 1549 */ 1550 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1551 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1552 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1553 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1554 1555 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1556 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1557 1558 /* 1559 note should be smaller for first and last process with no periodic 1560 does not handle dfill 1561 */ 1562 cnt = 0; 1563 /* coupling with process to the left */ 1564 for (i=0; i<s; i++) { 1565 for (j=0; j<nc; j++) { 1566 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1567 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1568 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1569 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1570 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1571 } 1572 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1573 cnt++; 1574 } 1575 } 1576 for (i=s; i<nx-s; i++) { 1577 for (j=0; j<nc; j++) { 1578 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1579 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1580 cnt++; 1581 } 1582 } 1583 /* coupling with process to the right */ 1584 for (i=nx-s; i<nx; i++) { 1585 for (j=0; j<nc; j++) { 1586 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1587 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1588 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1589 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1590 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1591 } 1592 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1593 cnt++; 1594 } 1595 } 1596 1597 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1598 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1599 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1600 1601 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1602 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1603 1604 /* 1605 For each node in the grid: we get the neighbors in the local (on processor ordering 1606 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1607 PETSc ordering. 1608 */ 1609 if (!da->prealloc_only) { 1610 ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr); 1611 1612 row = xs*nc; 1613 /* coupling with process to the left */ 1614 for (i=xs; i<xs+s; i++) { 1615 for (j=0; j<nc; j++) { 1616 cnt = 0; 1617 if (rank) { 1618 for (l=0; l<s; l++) { 1619 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1620 } 1621 } 1622 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1623 for (l=0; l<s; l++) { 1624 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1625 } 1626 } 1627 if (dfill) { 1628 for (k=dfill[j]; k<dfill[j+1]; k++) { 1629 cols[cnt++] = i*nc + dfill[k]; 1630 } 1631 } else { 1632 for (k=0; k<nc; k++) { 1633 cols[cnt++] = i*nc + k; 1634 } 1635 } 1636 for (l=0; l<s; l++) { 1637 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1638 } 1639 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1640 row++; 1641 } 1642 } 1643 for (i=xs+s; i<xs+nx-s; i++) { 1644 for (j=0; j<nc; j++) { 1645 cnt = 0; 1646 for (l=0; l<s; l++) { 1647 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1648 } 1649 if (dfill) { 1650 for (k=dfill[j]; k<dfill[j+1]; k++) { 1651 cols[cnt++] = i*nc + dfill[k]; 1652 } 1653 } else { 1654 for (k=0; k<nc; k++) { 1655 cols[cnt++] = i*nc + k; 1656 } 1657 } 1658 for (l=0; l<s; l++) { 1659 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1660 } 1661 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1662 row++; 1663 } 1664 } 1665 /* coupling with process to the right */ 1666 for (i=xs+nx-s; i<xs+nx; i++) { 1667 for (j=0; j<nc; j++) { 1668 cnt = 0; 1669 for (l=0; l<s; l++) { 1670 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1671 } 1672 if (dfill) { 1673 for (k=dfill[j]; k<dfill[j+1]; k++) { 1674 cols[cnt++] = i*nc + dfill[k]; 1675 } 1676 } else { 1677 for (k=0; k<nc; k++) { 1678 cols[cnt++] = i*nc + k; 1679 } 1680 } 1681 if (rank < size-1) { 1682 for (l=0; l<s; l++) { 1683 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1684 } 1685 } 1686 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1687 for (l=0; l<s; l++) { 1688 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1689 } 1690 } 1691 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1692 row++; 1693 } 1694 } 1695 ierr = PetscFree2(values,cols);CHKERRQ(ierr); 1696 /* do not copy values to GPU since they are all zero and not yet needed there */ 1697 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1698 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1701 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 /* ---------------------------------------------------------------------------------*/ 1707 1708 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1709 { 1710 PetscErrorCode ierr; 1711 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1712 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1713 PetscInt istart,iend; 1714 PetscScalar *values; 1715 DMBoundaryType bx; 1716 ISLocalToGlobalMapping ltog,mltog; 1717 1718 PetscFunctionBegin; 1719 /* 1720 nc - number of components per grid point 1721 col - number of colors needed in one direction for single component problem 1722 1723 */ 1724 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1725 col = 2*s + 1; 1726 1727 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1728 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1729 1730 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1731 ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr); 1732 ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr); 1733 1734 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1735 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1736 if (!mltog) { 1737 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1738 } 1739 1740 /* 1741 For each node in the grid: we get the neighbors in the local (on processor ordering 1742 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1743 PETSc ordering. 1744 */ 1745 if (!da->prealloc_only) { 1746 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1747 ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr); 1748 for (i=xs; i<xs+nx; i++) { 1749 istart = PetscMax(-s,gxs - i); 1750 iend = PetscMin(s,gxs + gnx - i - 1); 1751 slot = i - gxs; 1752 1753 cnt = 0; 1754 for (l=0; l<nc; l++) { 1755 for (i1=istart; i1<iend+1; i1++) { 1756 cols[cnt++] = l + nc*(slot + i1); 1757 } 1758 rows[l] = l + nc*(slot); 1759 } 1760 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1761 } 1762 ierr = PetscFree(values);CHKERRQ(ierr); 1763 /* do not copy values to GPU since they are all zero and not yet needed there */ 1764 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1765 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1766 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1767 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1768 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1769 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1775 { 1776 PetscErrorCode ierr; 1777 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1778 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1779 PetscInt istart,iend,jstart,jend,ii,jj; 1780 MPI_Comm comm; 1781 PetscScalar *values; 1782 DMBoundaryType bx,by; 1783 DMDAStencilType st; 1784 ISLocalToGlobalMapping ltog; 1785 1786 PetscFunctionBegin; 1787 /* 1788 nc - number of components per grid point 1789 col - number of colors needed in one direction for single component problem 1790 */ 1791 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1792 col = 2*s + 1; 1793 1794 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1795 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1796 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1797 1798 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1799 1800 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1801 1802 /* determine the matrix preallocation information */ 1803 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1804 for (i=xs; i<xs+nx; i++) { 1805 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1806 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1807 for (j=ys; j<ys+ny; j++) { 1808 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1809 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1810 slot = i - gxs + gnx*(j - gys); 1811 1812 /* Find block columns in block row */ 1813 cnt = 0; 1814 for (ii=istart; ii<iend+1; ii++) { 1815 for (jj=jstart; jj<jend+1; jj++) { 1816 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1817 cols[cnt++] = slot + ii + gnx*jj; 1818 } 1819 } 1820 } 1821 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1822 } 1823 } 1824 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1825 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1826 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1827 1828 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1829 1830 /* 1831 For each node in the grid: we get the neighbors in the local (on processor ordering 1832 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1833 PETSc ordering. 1834 */ 1835 if (!da->prealloc_only) { 1836 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1837 for (i=xs; i<xs+nx; i++) { 1838 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1839 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1840 for (j=ys; j<ys+ny; j++) { 1841 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1842 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1843 slot = i - gxs + gnx*(j - gys); 1844 cnt = 0; 1845 for (ii=istart; ii<iend+1; ii++) { 1846 for (jj=jstart; jj<jend+1; jj++) { 1847 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1848 cols[cnt++] = slot + ii + gnx*jj; 1849 } 1850 } 1851 } 1852 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1853 } 1854 } 1855 ierr = PetscFree(values);CHKERRQ(ierr); 1856 /* do not copy values to GPU since they are all zero and not yet needed there */ 1857 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1858 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1859 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1860 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1861 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1862 } 1863 ierr = PetscFree(cols);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1868 { 1869 PetscErrorCode ierr; 1870 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1871 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1872 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1873 MPI_Comm comm; 1874 PetscScalar *values; 1875 DMBoundaryType bx,by,bz; 1876 DMDAStencilType st; 1877 ISLocalToGlobalMapping ltog; 1878 1879 PetscFunctionBegin; 1880 /* 1881 nc - number of components per grid point 1882 col - number of colors needed in one direction for single component problem 1883 1884 */ 1885 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1886 col = 2*s + 1; 1887 1888 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1889 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1890 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1891 1892 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1893 1894 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1895 1896 /* determine the matrix preallocation information */ 1897 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1898 for (i=xs; i<xs+nx; i++) { 1899 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1900 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1901 for (j=ys; j<ys+ny; j++) { 1902 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1903 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1904 for (k=zs; k<zs+nz; k++) { 1905 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1906 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1907 1908 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1909 1910 /* Find block columns in block row */ 1911 cnt = 0; 1912 for (ii=istart; ii<iend+1; ii++) { 1913 for (jj=jstart; jj<jend+1; jj++) { 1914 for (kk=kstart; kk<kend+1; kk++) { 1915 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1916 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1917 } 1918 } 1919 } 1920 } 1921 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1922 } 1923 } 1924 } 1925 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1926 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1927 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1928 1929 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1930 1931 /* 1932 For each node in the grid: we get the neighbors in the local (on processor ordering 1933 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1934 PETSc ordering. 1935 */ 1936 if (!da->prealloc_only) { 1937 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1938 for (i=xs; i<xs+nx; i++) { 1939 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1940 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1941 for (j=ys; j<ys+ny; j++) { 1942 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1943 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1944 for (k=zs; k<zs+nz; k++) { 1945 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1946 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1947 1948 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1949 1950 cnt = 0; 1951 for (ii=istart; ii<iend+1; ii++) { 1952 for (jj=jstart; jj<jend+1; jj++) { 1953 for (kk=kstart; kk<kend+1; kk++) { 1954 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1955 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1956 } 1957 } 1958 } 1959 } 1960 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1961 } 1962 } 1963 } 1964 ierr = PetscFree(values);CHKERRQ(ierr); 1965 /* do not copy values to GPU since they are all zero and not yet needed there */ 1966 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1967 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1970 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1971 } 1972 ierr = PetscFree(cols);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* 1977 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1978 identify in the local ordering with periodic domain. 1979 */ 1980 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 1981 { 1982 PetscErrorCode ierr; 1983 PetscInt i,n; 1984 1985 PetscFunctionBegin; 1986 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 1987 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 1988 for (i=0,n=0; i<*cnt; i++) { 1989 if (col[i] >= *row) col[n++] = col[i]; 1990 } 1991 *cnt = n; 1992 PetscFunctionReturn(0); 1993 } 1994 1995 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 1996 { 1997 PetscErrorCode ierr; 1998 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1999 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2000 PetscInt istart,iend,jstart,jend,ii,jj; 2001 MPI_Comm comm; 2002 PetscScalar *values; 2003 DMBoundaryType bx,by; 2004 DMDAStencilType st; 2005 ISLocalToGlobalMapping ltog; 2006 2007 PetscFunctionBegin; 2008 /* 2009 nc - number of components per grid point 2010 col - number of colors needed in one direction for single component problem 2011 */ 2012 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 2013 col = 2*s + 1; 2014 2015 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 2016 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 2017 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2018 2019 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2020 2021 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2022 2023 /* determine the matrix preallocation information */ 2024 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2025 for (i=xs; i<xs+nx; i++) { 2026 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2027 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2028 for (j=ys; j<ys+ny; j++) { 2029 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2030 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2031 slot = i - gxs + gnx*(j - gys); 2032 2033 /* Find block columns in block row */ 2034 cnt = 0; 2035 for (ii=istart; ii<iend+1; ii++) { 2036 for (jj=jstart; jj<jend+1; jj++) { 2037 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2038 cols[cnt++] = slot + ii + gnx*jj; 2039 } 2040 } 2041 } 2042 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2043 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2044 } 2045 } 2046 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2047 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2048 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2049 2050 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2051 2052 /* 2053 For each node in the grid: we get the neighbors in the local (on processor ordering 2054 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2055 PETSc ordering. 2056 */ 2057 if (!da->prealloc_only) { 2058 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2059 for (i=xs; i<xs+nx; i++) { 2060 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2061 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2062 for (j=ys; j<ys+ny; j++) { 2063 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2064 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2065 slot = i - gxs + gnx*(j - gys); 2066 2067 /* Find block columns in block row */ 2068 cnt = 0; 2069 for (ii=istart; ii<iend+1; ii++) { 2070 for (jj=jstart; jj<jend+1; jj++) { 2071 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2072 cols[cnt++] = slot + ii + gnx*jj; 2073 } 2074 } 2075 } 2076 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2077 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2078 } 2079 } 2080 ierr = PetscFree(values);CHKERRQ(ierr); 2081 /* do not copy values to GPU since they are all zero and not yet needed there */ 2082 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2083 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2086 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2087 } 2088 ierr = PetscFree(cols);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2093 { 2094 PetscErrorCode ierr; 2095 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2096 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2097 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2098 MPI_Comm comm; 2099 PetscScalar *values; 2100 DMBoundaryType bx,by,bz; 2101 DMDAStencilType st; 2102 ISLocalToGlobalMapping ltog; 2103 2104 PetscFunctionBegin; 2105 /* 2106 nc - number of components per grid point 2107 col - number of colors needed in one direction for single component problem 2108 */ 2109 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2110 col = 2*s + 1; 2111 2112 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2113 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2114 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2115 2116 /* create the matrix */ 2117 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2118 2119 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2120 2121 /* determine the matrix preallocation information */ 2122 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2123 for (i=xs; i<xs+nx; i++) { 2124 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2125 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2126 for (j=ys; j<ys+ny; j++) { 2127 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2128 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2129 for (k=zs; k<zs+nz; k++) { 2130 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2131 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2132 2133 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2134 2135 /* Find block columns in block row */ 2136 cnt = 0; 2137 for (ii=istart; ii<iend+1; ii++) { 2138 for (jj=jstart; jj<jend+1; jj++) { 2139 for (kk=kstart; kk<kend+1; kk++) { 2140 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2141 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2142 } 2143 } 2144 } 2145 } 2146 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2147 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2148 } 2149 } 2150 } 2151 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2152 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2153 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2154 2155 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2156 2157 /* 2158 For each node in the grid: we get the neighbors in the local (on processor ordering 2159 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2160 PETSc ordering. 2161 */ 2162 if (!da->prealloc_only) { 2163 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2164 for (i=xs; i<xs+nx; i++) { 2165 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2166 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2167 for (j=ys; j<ys+ny; j++) { 2168 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2169 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2170 for (k=zs; k<zs+nz; k++) { 2171 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2172 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2173 2174 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2175 2176 cnt = 0; 2177 for (ii=istart; ii<iend+1; ii++) { 2178 for (jj=jstart; jj<jend+1; jj++) { 2179 for (kk=kstart; kk<kend+1; kk++) { 2180 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2181 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2182 } 2183 } 2184 } 2185 } 2186 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2187 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2188 } 2189 } 2190 } 2191 ierr = PetscFree(values);CHKERRQ(ierr); 2192 /* do not copy values to GPU since they are all zero and not yet needed there */ 2193 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2194 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2195 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2197 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2198 } 2199 ierr = PetscFree(cols);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 /* ---------------------------------------------------------------------------------*/ 2204 2205 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2209 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2210 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2211 DM_DA *dd = (DM_DA*)da->data; 2212 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2213 MPI_Comm comm; 2214 PetscScalar *values; 2215 DMBoundaryType bx,by,bz; 2216 ISLocalToGlobalMapping ltog; 2217 DMDAStencilType st; 2218 PetscBool removedups = PETSC_FALSE; 2219 2220 PetscFunctionBegin; 2221 /* 2222 nc - number of components per grid point 2223 col - number of colors needed in one direction for single component problem 2224 2225 */ 2226 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2227 col = 2*s + 1; 2228 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2229 by 2*stencil_width + 1\n"); 2230 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2231 by 2*stencil_width + 1\n"); 2232 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2233 by 2*stencil_width + 1\n"); 2234 2235 /* 2236 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2237 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2238 */ 2239 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2240 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2241 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2242 2243 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2244 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2245 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2246 2247 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2248 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2249 2250 /* determine the matrix preallocation information */ 2251 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2252 2253 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2254 for (i=xs; i<xs+nx; i++) { 2255 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2256 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2257 for (j=ys; j<ys+ny; j++) { 2258 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2259 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2260 for (k=zs; k<zs+nz; k++) { 2261 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2262 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2263 2264 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2265 2266 for (l=0; l<nc; l++) { 2267 cnt = 0; 2268 for (ii=istart; ii<iend+1; ii++) { 2269 for (jj=jstart; jj<jend+1; jj++) { 2270 for (kk=kstart; kk<kend+1; kk++) { 2271 if (ii || jj || kk) { 2272 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2273 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2274 } 2275 } else { 2276 if (dfill) { 2277 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2278 } else { 2279 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2280 } 2281 } 2282 } 2283 } 2284 } 2285 row = l + nc*(slot); 2286 maxcnt = PetscMax(maxcnt,cnt); 2287 if (removedups) { 2288 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2289 } else { 2290 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2291 } 2292 } 2293 } 2294 } 2295 } 2296 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2297 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2298 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2299 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2300 2301 /* 2302 For each node in the grid: we get the neighbors in the local (on processor ordering 2303 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2304 PETSc ordering. 2305 */ 2306 if (!da->prealloc_only) { 2307 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2308 for (i=xs; i<xs+nx; i++) { 2309 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2310 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2311 for (j=ys; j<ys+ny; j++) { 2312 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2313 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2314 for (k=zs; k<zs+nz; k++) { 2315 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2316 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2317 2318 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2319 2320 for (l=0; l<nc; l++) { 2321 cnt = 0; 2322 for (ii=istart; ii<iend+1; ii++) { 2323 for (jj=jstart; jj<jend+1; jj++) { 2324 for (kk=kstart; kk<kend+1; kk++) { 2325 if (ii || jj || kk) { 2326 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2327 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2328 } 2329 } else { 2330 if (dfill) { 2331 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2332 } else { 2333 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2334 } 2335 } 2336 } 2337 } 2338 } 2339 row = l + nc*(slot); 2340 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2341 } 2342 } 2343 } 2344 } 2345 ierr = PetscFree(values);CHKERRQ(ierr); 2346 /* do not copy values to GPU since they are all zero and not yet needed there */ 2347 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2348 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2349 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2350 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2351 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2352 } 2353 ierr = PetscFree(cols);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356