1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on da 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on da 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being more 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 314 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\ 315 by 2*stencil_width + 1 (%d)\n", m, col); 316 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\ 317 by 2*stencil_width + 1 (%d)\n", n, col); 318 if (ctype == IS_COLORING_GLOBAL) { 319 if (!dd->localcoloring) { 320 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 321 ii = 0; 322 for (j=ys; j<ys+ny; j++) { 323 for (i=xs; i<xs+nx; i++) { 324 for (k=0; k<nc; k++) { 325 colors[ii++] = k + nc*((i % col) + col*(j % col)); 326 } 327 } 328 } 329 ncolors = nc + nc*(col-1 + col*(col-1)); 330 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 331 } 332 *coloring = dd->localcoloring; 333 } else if (ctype == IS_COLORING_LOCAL) { 334 if (!dd->ghostedcoloring) { 335 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 336 ii = 0; 337 for (j=gys; j<gys+gny; j++) { 338 for (i=gxs; i<gxs+gnx; i++) { 339 for (k=0; k<nc; k++) { 340 /* the complicated stuff is to handle periodic boundaries */ 341 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 342 } 343 } 344 } 345 ncolors = nc + nc*(col - 1 + col*(col-1)); 346 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 347 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 348 349 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 350 } 351 *coloring = dd->ghostedcoloring; 352 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 353 } 354 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /* ---------------------------------------------------------------------------------*/ 359 360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 361 { 362 PetscErrorCode ierr; 363 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 364 PetscInt ncolors; 365 MPI_Comm comm; 366 DMBoundaryType bx,by,bz; 367 DMDAStencilType st; 368 ISColoringValue *colors; 369 DM_DA *dd = (DM_DA*)da->data; 370 371 PetscFunctionBegin; 372 /* 373 nc - number of components per grid point 374 col - number of colors needed in one direction for single component problem 375 376 */ 377 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 378 col = 2*s + 1; 379 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 380 by 2*stencil_width + 1\n"); 381 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 382 by 2*stencil_width + 1\n"); 383 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 384 by 2*stencil_width + 1\n"); 385 386 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 387 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 388 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 389 390 /* create the coloring */ 391 if (ctype == IS_COLORING_GLOBAL) { 392 if (!dd->localcoloring) { 393 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 394 ii = 0; 395 for (k=zs; k<zs+nz; k++) { 396 for (j=ys; j<ys+ny; j++) { 397 for (i=xs; i<xs+nx; i++) { 398 for (l=0; l<nc; l++) { 399 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 400 } 401 } 402 } 403 } 404 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 405 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 406 } 407 *coloring = dd->localcoloring; 408 } else if (ctype == IS_COLORING_LOCAL) { 409 if (!dd->ghostedcoloring) { 410 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 411 ii = 0; 412 for (k=gzs; k<gzs+gnz; k++) { 413 for (j=gys; j<gys+gny; j++) { 414 for (i=gxs; i<gxs+gnx; i++) { 415 for (l=0; l<nc; l++) { 416 /* the complicated stuff is to handle periodic boundaries */ 417 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 418 } 419 } 420 } 421 } 422 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 423 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 424 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 425 } 426 *coloring = dd->ghostedcoloring; 427 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 428 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /* ---------------------------------------------------------------------------------*/ 433 434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 435 { 436 PetscErrorCode ierr; 437 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 438 PetscInt ncolors; 439 MPI_Comm comm; 440 DMBoundaryType bx; 441 ISColoringValue *colors; 442 DM_DA *dd = (DM_DA*)da->data; 443 444 PetscFunctionBegin; 445 /* 446 nc - number of components per grid point 447 col - number of colors needed in one direction for single component problem 448 449 */ 450 ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 451 col = 2*s + 1; 452 453 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\ 454 by 2*stencil_width + 1 %d\n",(int)m,(int)col); 455 456 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 457 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 458 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 459 460 /* create the coloring */ 461 if (ctype == IS_COLORING_GLOBAL) { 462 if (!dd->localcoloring) { 463 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 464 if (dd->ofillcols) { 465 PetscInt tc = 0; 466 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 467 i1 = 0; 468 for (i=xs; i<xs+nx; i++) { 469 for (l=0; l<nc; l++) { 470 if (dd->ofillcols[l] && (i % col)) { 471 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 472 } else { 473 colors[i1++] = l; 474 } 475 } 476 } 477 ncolors = nc + 2*s*tc; 478 } else { 479 i1 = 0; 480 for (i=xs; i<xs+nx; i++) { 481 for (l=0; l<nc; l++) { 482 colors[i1++] = l + nc*(i % col); 483 } 484 } 485 ncolors = nc + nc*(col-1); 486 } 487 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 488 } 489 *coloring = dd->localcoloring; 490 } else if (ctype == IS_COLORING_LOCAL) { 491 if (!dd->ghostedcoloring) { 492 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 493 i1 = 0; 494 for (i=gxs; i<gxs+gnx; i++) { 495 for (l=0; l<nc; l++) { 496 /* the complicated stuff is to handle periodic boundaries */ 497 colors[i1++] = l + nc*(SetInRange(i,m) % col); 498 } 499 } 500 ncolors = nc + nc*(col-1); 501 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 502 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 503 } 504 *coloring = dd->ghostedcoloring; 505 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 506 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 511 { 512 PetscErrorCode ierr; 513 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 514 PetscInt ncolors; 515 MPI_Comm comm; 516 DMBoundaryType bx,by; 517 ISColoringValue *colors; 518 DM_DA *dd = (DM_DA*)da->data; 519 520 PetscFunctionBegin; 521 /* 522 nc - number of components per grid point 523 col - number of colors needed in one direction for single component problem 524 525 */ 526 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr); 527 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 528 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 529 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 530 531 if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n"); 532 if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n"); 533 534 /* create the coloring */ 535 if (ctype == IS_COLORING_GLOBAL) { 536 if (!dd->localcoloring) { 537 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 538 ii = 0; 539 for (j=ys; j<ys+ny; j++) { 540 for (i=xs; i<xs+nx; i++) { 541 for (k=0; k<nc; k++) { 542 colors[ii++] = k + nc*((3*j+i) % 5); 543 } 544 } 545 } 546 ncolors = 5*nc; 547 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 548 } 549 *coloring = dd->localcoloring; 550 } else if (ctype == IS_COLORING_LOCAL) { 551 if (!dd->ghostedcoloring) { 552 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 553 ii = 0; 554 for (j=gys; j<gys+gny; j++) { 555 for (i=gxs; i<gxs+gnx; i++) { 556 for (k=0; k<nc; k++) { 557 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 558 } 559 } 560 } 561 ncolors = 5*nc; 562 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 563 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 564 } 565 *coloring = dd->ghostedcoloring; 566 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 567 PetscFunctionReturn(0); 568 } 569 570 /* =========================================================================== */ 571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 584 585 /*@C 586 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 587 588 Logically Collective on mat 589 590 Input Parameters: 591 + mat - the matrix 592 - da - the da 593 594 Level: intermediate 595 596 @*/ 597 PetscErrorCode MatSetupDM(Mat mat,DM da) 598 { 599 PetscErrorCode ierr; 600 601 PetscFunctionBegin; 602 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 603 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 604 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 609 { 610 DM da; 611 PetscErrorCode ierr; 612 const char *prefix; 613 Mat Anatural; 614 AO ao; 615 PetscInt rstart,rend,*petsc,i; 616 IS is; 617 MPI_Comm comm; 618 PetscViewerFormat format; 619 620 PetscFunctionBegin; 621 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 624 625 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 626 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 627 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 628 629 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 630 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 631 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 632 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 633 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 634 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 635 636 /* call viewer on natural ordering */ 637 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 638 ierr = ISDestroy(&is);CHKERRQ(ierr); 639 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 640 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 641 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 642 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 643 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 644 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 645 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 650 { 651 DM da; 652 PetscErrorCode ierr; 653 Mat Anatural,Aapp; 654 AO ao; 655 PetscInt rstart,rend,*app,i,m,n,M,N; 656 IS is; 657 MPI_Comm comm; 658 659 PetscFunctionBegin; 660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 661 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 662 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 663 664 /* Load the matrix in natural ordering */ 665 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 666 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 667 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 669 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 670 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 671 672 /* Map natural ordering to application ordering and create IS */ 673 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 674 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 675 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 676 for (i=rstart; i<rend; i++) app[i-rstart] = i; 677 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 678 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 679 680 /* Do permutation and replace header */ 681 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 682 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 683 ierr = ISDestroy(&is);CHKERRQ(ierr); 684 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 689 { 690 PetscErrorCode ierr; 691 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 692 Mat A; 693 MPI_Comm comm; 694 MatType Atype; 695 PetscSection section, sectionGlobal; 696 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 697 MatType mtype; 698 PetscMPIInt size; 699 DM_DA *dd = (DM_DA*)da->data; 700 701 PetscFunctionBegin; 702 ierr = MatInitializePackage();CHKERRQ(ierr); 703 mtype = da->mattype; 704 705 ierr = DMGetSection(da, §ion);CHKERRQ(ierr); 706 if (section) { 707 PetscInt bs = -1; 708 PetscInt localSize; 709 PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric; 710 711 ierr = DMGetGlobalSection(da, §ionGlobal);CHKERRQ(ierr); 712 ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr); 713 ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr); 714 ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 715 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 716 ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr); 717 ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr); 718 ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr); 719 ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr); 720 ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr); 721 ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr); 722 ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr); 723 /* Check for symmetric storage */ 724 isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock); 725 if (isSymmetric) { 726 ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr); 727 } 728 if (!isShell) { 729 PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal; 730 731 if (bs < 0) { 732 if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) { 733 PetscInt pStart, pEnd, p, dof; 734 735 ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr); 736 for (p = pStart; p < pEnd; ++p) { 737 ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr); 738 if (dof) { 739 bs = dof; 740 break; 741 } 742 } 743 } else { 744 bs = 1; 745 } 746 /* Must have same blocksize on all procs (some might have no points) */ 747 bsLocal = bs; 748 ierr = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr); 749 } 750 ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr); 751 /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */ 752 ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr); 753 } 754 } 755 /* 756 m 757 ------------------------------------------------------ 758 | | 759 | | 760 | ---------------------- | 761 | | | | 762 n | ny | | | 763 | | | | 764 | .--------------------- | 765 | (xs,ys) nx | 766 | . | 767 | (gxs,gys) | 768 | | 769 ----------------------------------------------------- 770 */ 771 772 /* 773 nc - number of components per grid point 774 col - number of colors needed in one direction for single component problem 775 776 */ 777 M = dd->M; 778 N = dd->N; 779 P = dd->P; 780 dim = da->dim; 781 dof = dd->w; 782 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */ 783 ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr); 784 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 785 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 786 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 787 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 788 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 789 ierr = MatSetDM(A,da);CHKERRQ(ierr); 790 if (da->structure_only) { 791 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 792 } 793 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 794 /* 795 We do not provide a getmatrix function in the DMDA operations because 796 the basic DMDA does not know about matrices. We think of DMDA as being more 797 more low-level than matrices. This is kind of cheating but, cause sometimes 798 we think of DMDA has higher level than matrices. 799 800 We could switch based on Atype (or mtype), but we do not since the 801 specialized setting routines depend only on the particular preallocation 802 details of the matrix, not the type itself. 803 */ 804 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 805 if (!aij) { 806 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 807 } 808 if (!aij) { 809 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 810 if (!baij) { 811 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 812 } 813 if (!baij) { 814 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 815 if (!sbaij) { 816 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 817 } 818 if (!sbaij) { 819 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 820 if (!sell) { 821 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 822 } 823 } 824 if (!sell) { 825 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 826 } 827 } 828 } 829 if (aij) { 830 if (dim == 1) { 831 if (dd->ofill) { 832 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 833 } else { 834 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 835 } 836 } else if (dim == 2) { 837 if (dd->ofill) { 838 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 839 } else { 840 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 841 } 842 } else if (dim == 3) { 843 if (dd->ofill) { 844 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 845 } else { 846 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 847 } 848 } 849 } else if (baij) { 850 if (dim == 2) { 851 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 852 } else if (dim == 3) { 853 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 854 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 855 } else if (sbaij) { 856 if (dim == 2) { 857 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 858 } else if (dim == 3) { 859 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 860 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 861 } else if (sell) { 862 if (dim == 2) { 863 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 864 } else if (dim == 3) { 865 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 866 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 867 } else if (is) { 868 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 869 } else { 870 ISLocalToGlobalMapping ltog; 871 872 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 873 ierr = MatSetUp(A);CHKERRQ(ierr); 874 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 875 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 876 } 877 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 878 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 879 ierr = MatSetDM(A,da);CHKERRQ(ierr); 880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 881 if (size > 1) { 882 /* change viewer to display matrix in natural ordering */ 883 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 884 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 885 } 886 *J = A; 887 PetscFunctionReturn(0); 888 } 889 890 /* ---------------------------------------------------------------------------------*/ 891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 892 893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 894 { 895 DM_DA *da = (DM_DA*)dm->data; 896 Mat lJ; 897 ISLocalToGlobalMapping ltog; 898 IS is_loc_filt, is_glob; 899 const PetscInt *e_loc,*idx; 900 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 901 PetscBool flg; 902 PetscErrorCode ierr; 903 904 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 905 We need to filter the local indices that are represented through the DMDAGetElements decomposition 906 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 907 PetscFunctionBegin; 908 dof = da->w; 909 dim = dm->dim; 910 911 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 912 913 /* get local elements indices in local DMDA numbering */ 914 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 915 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 916 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 917 918 /* obtain a consistent local ordering for MATIS */ 919 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 920 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 921 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 922 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 923 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 924 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 925 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 926 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 927 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 928 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 929 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 930 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 931 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 932 933 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 934 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 935 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 936 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 937 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 938 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 939 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 940 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 941 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 942 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 943 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 944 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 945 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 946 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 947 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 948 ierr = PetscFree(gidx);CHKERRQ(ierr); 949 950 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 951 flg = dm->prealloc_only; 952 dm->prealloc_only = PETSC_TRUE; 953 switch (dim) { 954 case 1: 955 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 956 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 957 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 958 break; 959 case 2: 960 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 961 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 962 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 963 break; 964 case 3: 965 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 966 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 967 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 968 break; 969 default: 970 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 971 break; 972 } 973 dm->prealloc_only = flg; 974 PetscFunctionReturn(0); 975 } 976 977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 978 { 979 PetscErrorCode ierr; 980 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 981 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 982 MPI_Comm comm; 983 PetscScalar *values; 984 DMBoundaryType bx,by; 985 ISLocalToGlobalMapping ltog; 986 DMDAStencilType st; 987 988 PetscFunctionBegin; 989 /* 990 nc - number of components per grid point 991 col - number of colors needed in one direction for single component problem 992 993 */ 994 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 995 col = 2*s + 1; 996 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 997 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 998 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 999 1000 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1001 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1002 1003 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1004 /* determine the matrix preallocation information */ 1005 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1006 for (i=xs; i<xs+nx; i++) { 1007 1008 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1009 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1010 1011 for (j=ys; j<ys+ny; j++) { 1012 slot = i - gxs + gnx*(j - gys); 1013 1014 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1015 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1016 1017 cnt = 0; 1018 for (k=0; k<nc; k++) { 1019 for (l=lstart; l<lend+1; l++) { 1020 for (p=pstart; p<pend+1; p++) { 1021 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1022 cols[cnt++] = k + nc*(slot + gnx*l + p); 1023 } 1024 } 1025 } 1026 rows[k] = k + nc*(slot); 1027 } 1028 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1029 } 1030 } 1031 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1032 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1033 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1034 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1035 1036 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1037 1038 /* 1039 For each node in the grid: we get the neighbors in the local (on processor ordering 1040 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1041 PETSc ordering. 1042 */ 1043 if (!da->prealloc_only) { 1044 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1045 for (i=xs; i<xs+nx; i++) { 1046 1047 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1048 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1049 1050 for (j=ys; j<ys+ny; j++) { 1051 slot = i - gxs + gnx*(j - gys); 1052 1053 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1054 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1055 1056 cnt = 0; 1057 for (k=0; k<nc; k++) { 1058 for (l=lstart; l<lend+1; l++) { 1059 for (p=pstart; p<pend+1; p++) { 1060 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1061 cols[cnt++] = k + nc*(slot + gnx*l + p); 1062 } 1063 } 1064 } 1065 rows[k] = k + nc*(slot); 1066 } 1067 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1068 } 1069 } 1070 ierr = PetscFree(values);CHKERRQ(ierr); 1071 /* do not copy values to GPU since they are all zero and not yet needed there */ 1072 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1073 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1075 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1076 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1077 } 1078 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1083 { 1084 PetscErrorCode ierr; 1085 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1086 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1087 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1088 MPI_Comm comm; 1089 PetscScalar *values; 1090 DMBoundaryType bx,by,bz; 1091 ISLocalToGlobalMapping ltog; 1092 DMDAStencilType st; 1093 1094 PetscFunctionBegin; 1095 /* 1096 nc - number of components per grid point 1097 col - number of colors needed in one direction for single component problem 1098 1099 */ 1100 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1101 col = 2*s + 1; 1102 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1103 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1104 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1105 1106 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1107 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1108 1109 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1110 /* determine the matrix preallocation information */ 1111 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1112 for (i=xs; i<xs+nx; i++) { 1113 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1114 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1115 for (j=ys; j<ys+ny; j++) { 1116 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1117 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1118 for (k=zs; k<zs+nz; k++) { 1119 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1120 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1121 1122 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1123 1124 cnt = 0; 1125 for (l=0; l<nc; l++) { 1126 for (ii=istart; ii<iend+1; ii++) { 1127 for (jj=jstart; jj<jend+1; jj++) { 1128 for (kk=kstart; kk<kend+1; kk++) { 1129 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1130 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1131 } 1132 } 1133 } 1134 } 1135 rows[l] = l + nc*(slot); 1136 } 1137 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1138 } 1139 } 1140 } 1141 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1142 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1143 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1144 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1145 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1146 1147 /* 1148 For each node in the grid: we get the neighbors in the local (on processor ordering 1149 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1150 PETSc ordering. 1151 */ 1152 if (!da->prealloc_only) { 1153 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1154 for (i=xs; i<xs+nx; i++) { 1155 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1156 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1157 for (j=ys; j<ys+ny; j++) { 1158 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1159 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1160 for (k=zs; k<zs+nz; k++) { 1161 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1162 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1163 1164 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1165 1166 cnt = 0; 1167 for (l=0; l<nc; l++) { 1168 for (ii=istart; ii<iend+1; ii++) { 1169 for (jj=jstart; jj<jend+1; jj++) { 1170 for (kk=kstart; kk<kend+1; kk++) { 1171 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1172 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1173 } 1174 } 1175 } 1176 } 1177 rows[l] = l + nc*(slot); 1178 } 1179 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1180 } 1181 } 1182 } 1183 ierr = PetscFree(values);CHKERRQ(ierr); 1184 /* do not copy values to GPU since they are all zero and not yet needed there */ 1185 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1186 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1187 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1188 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1189 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1190 } 1191 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1196 { 1197 PetscErrorCode ierr; 1198 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1199 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1200 MPI_Comm comm; 1201 DMBoundaryType bx,by; 1202 ISLocalToGlobalMapping ltog,mltog; 1203 DMDAStencilType st; 1204 PetscBool removedups = PETSC_FALSE; 1205 1206 PetscFunctionBegin; 1207 /* 1208 nc - number of components per grid point 1209 col - number of colors needed in one direction for single component problem 1210 1211 */ 1212 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1213 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1214 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1215 } 1216 col = 2*s + 1; 1217 /* 1218 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1219 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1220 */ 1221 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1222 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1223 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1224 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1225 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1226 1227 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1228 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1229 1230 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1231 /* determine the matrix preallocation information */ 1232 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1233 for (i=xs; i<xs+nx; i++) { 1234 1235 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1236 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1237 1238 for (j=ys; j<ys+ny; j++) { 1239 slot = i - gxs + gnx*(j - gys); 1240 1241 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1242 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1243 1244 cnt = 0; 1245 for (k=0; k<nc; k++) { 1246 for (l=lstart; l<lend+1; l++) { 1247 for (p=pstart; p<pend+1; p++) { 1248 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1249 cols[cnt++] = k + nc*(slot + gnx*l + p); 1250 } 1251 } 1252 } 1253 rows[k] = k + nc*(slot); 1254 } 1255 if (removedups) { 1256 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1257 } else { 1258 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1259 } 1260 } 1261 } 1262 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1263 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1264 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1265 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1266 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1267 if (!mltog) { 1268 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1269 } 1270 1271 /* 1272 For each node in the grid: we get the neighbors in the local (on processor ordering 1273 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1274 PETSc ordering. 1275 */ 1276 if (!da->prealloc_only) { 1277 for (i=xs; i<xs+nx; i++) { 1278 1279 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1280 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1281 1282 for (j=ys; j<ys+ny; j++) { 1283 slot = i - gxs + gnx*(j - gys); 1284 1285 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1286 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1287 1288 cnt = 0; 1289 for (l=lstart; l<lend+1; l++) { 1290 for (p=pstart; p<pend+1; p++) { 1291 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1292 cols[cnt++] = nc*(slot + gnx*l + p); 1293 for (k=1; k<nc; k++) { 1294 cols[cnt] = 1 + cols[cnt-1];cnt++; 1295 } 1296 } 1297 } 1298 } 1299 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1300 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1301 } 1302 } 1303 /* do not copy values to GPU since they are all zero and not yet needed there */ 1304 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1305 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1306 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1307 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1308 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1309 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1310 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1311 } 1312 } 1313 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1318 { 1319 PetscErrorCode ierr; 1320 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1321 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1322 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1323 DM_DA *dd = (DM_DA*)da->data; 1324 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1325 MPI_Comm comm; 1326 DMBoundaryType bx,by; 1327 ISLocalToGlobalMapping ltog; 1328 DMDAStencilType st; 1329 PetscBool removedups = PETSC_FALSE; 1330 1331 PetscFunctionBegin; 1332 /* 1333 nc - number of components per grid point 1334 col - number of colors needed in one direction for single component problem 1335 1336 */ 1337 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1338 col = 2*s + 1; 1339 /* 1340 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1341 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1342 */ 1343 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1344 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1345 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1346 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1347 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1348 1349 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1350 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1351 1352 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1353 /* determine the matrix preallocation information */ 1354 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1355 for (i=xs; i<xs+nx; i++) { 1356 1357 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1358 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1359 1360 for (j=ys; j<ys+ny; j++) { 1361 slot = i - gxs + gnx*(j - gys); 1362 1363 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1364 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1365 1366 for (k=0; k<nc; k++) { 1367 cnt = 0; 1368 for (l=lstart; l<lend+1; l++) { 1369 for (p=pstart; p<pend+1; p++) { 1370 if (l || p) { 1371 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1372 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1373 } 1374 } else { 1375 if (dfill) { 1376 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1377 } else { 1378 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1379 } 1380 } 1381 } 1382 } 1383 row = k + nc*(slot); 1384 maxcnt = PetscMax(maxcnt,cnt); 1385 if (removedups) { 1386 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1387 } else { 1388 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1389 } 1390 } 1391 } 1392 } 1393 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1394 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1395 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1396 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1397 1398 /* 1399 For each node in the grid: we get the neighbors in the local (on processor ordering 1400 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1401 PETSc ordering. 1402 */ 1403 if (!da->prealloc_only) { 1404 for (i=xs; i<xs+nx; i++) { 1405 1406 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1407 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1408 1409 for (j=ys; j<ys+ny; j++) { 1410 slot = i - gxs + gnx*(j - gys); 1411 1412 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1413 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1414 1415 for (k=0; k<nc; k++) { 1416 cnt = 0; 1417 for (l=lstart; l<lend+1; l++) { 1418 for (p=pstart; p<pend+1; p++) { 1419 if (l || p) { 1420 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1421 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1422 } 1423 } else { 1424 if (dfill) { 1425 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1426 } else { 1427 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1428 } 1429 } 1430 } 1431 } 1432 row = k + nc*(slot); 1433 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1434 } 1435 } 1436 } 1437 /* do not copy values to GPU since they are all zero and not yet needed there */ 1438 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1439 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1440 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1441 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1442 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1443 } 1444 ierr = PetscFree(cols);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /* ---------------------------------------------------------------------------------*/ 1449 1450 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1451 { 1452 PetscErrorCode ierr; 1453 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1454 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1455 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1456 MPI_Comm comm; 1457 DMBoundaryType bx,by,bz; 1458 ISLocalToGlobalMapping ltog,mltog; 1459 DMDAStencilType st; 1460 PetscBool removedups = PETSC_FALSE; 1461 1462 PetscFunctionBegin; 1463 /* 1464 nc - number of components per grid point 1465 col - number of colors needed in one direction for single component problem 1466 1467 */ 1468 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1469 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1470 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1471 } 1472 col = 2*s + 1; 1473 1474 /* 1475 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1476 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1477 */ 1478 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1479 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1480 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1481 1482 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1483 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1484 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1485 1486 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1487 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1488 1489 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1490 /* determine the matrix preallocation information */ 1491 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1492 for (i=xs; i<xs+nx; i++) { 1493 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1494 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1495 for (j=ys; j<ys+ny; j++) { 1496 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1497 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1498 for (k=zs; k<zs+nz; k++) { 1499 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1500 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1501 1502 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1503 1504 cnt = 0; 1505 for (l=0; l<nc; l++) { 1506 for (ii=istart; ii<iend+1; ii++) { 1507 for (jj=jstart; jj<jend+1; jj++) { 1508 for (kk=kstart; kk<kend+1; kk++) { 1509 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1510 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1511 } 1512 } 1513 } 1514 } 1515 rows[l] = l + nc*(slot); 1516 } 1517 if (removedups) { 1518 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1521 } 1522 } 1523 } 1524 } 1525 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1526 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1527 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1528 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1529 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1530 if (!mltog) { 1531 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1532 } 1533 1534 /* 1535 For each node in the grid: we get the neighbors in the local (on processor ordering 1536 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1537 PETSc ordering. 1538 */ 1539 if (!da->prealloc_only) { 1540 for (i=xs; i<xs+nx; i++) { 1541 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1542 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1543 for (j=ys; j<ys+ny; j++) { 1544 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1545 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1546 for (k=zs; k<zs+nz; k++) { 1547 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1548 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1549 1550 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1551 1552 cnt = 0; 1553 for (kk=kstart; kk<kend+1; kk++) { 1554 for (jj=jstart; jj<jend+1; jj++) { 1555 for (ii=istart; ii<iend+1; ii++) { 1556 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1557 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1558 for (l=1; l<nc; l++) { 1559 cols[cnt] = 1 + cols[cnt-1];cnt++; 1560 } 1561 } 1562 } 1563 } 1564 } 1565 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1566 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1567 } 1568 } 1569 } 1570 /* do not copy values to GPU since they are all zero and not yet needed there */ 1571 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1572 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1573 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1574 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1575 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1576 } 1577 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1578 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1579 } 1580 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 /* ---------------------------------------------------------------------------------*/ 1585 1586 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1587 { 1588 PetscErrorCode ierr; 1589 DM_DA *dd = (DM_DA*)da->data; 1590 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1591 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1592 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1593 DMBoundaryType bx; 1594 ISLocalToGlobalMapping ltog; 1595 PetscMPIInt rank,size; 1596 1597 PetscFunctionBegin; 1598 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1599 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1600 1601 /* 1602 nc - number of components per grid point 1603 1604 */ 1605 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1606 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1607 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1608 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1609 1610 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1611 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1612 1613 /* 1614 note should be smaller for first and last process with no periodic 1615 does not handle dfill 1616 */ 1617 cnt = 0; 1618 /* coupling with process to the left */ 1619 for (i=0; i<s; i++) { 1620 for (j=0; j<nc; j++) { 1621 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1622 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1623 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1624 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1625 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1626 } 1627 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1628 cnt++; 1629 } 1630 } 1631 for (i=s; i<nx-s; i++) { 1632 for (j=0; j<nc; j++) { 1633 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1634 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1635 cnt++; 1636 } 1637 } 1638 /* coupling with process to the right */ 1639 for (i=nx-s; i<nx; i++) { 1640 for (j=0; j<nc; j++) { 1641 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1642 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1643 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1644 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1645 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1646 } 1647 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1648 cnt++; 1649 } 1650 } 1651 1652 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1653 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1654 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1655 1656 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1657 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1658 1659 /* 1660 For each node in the grid: we get the neighbors in the local (on processor ordering 1661 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1662 PETSc ordering. 1663 */ 1664 if (!da->prealloc_only) { 1665 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1666 row = xs*nc; 1667 /* coupling with process to the left */ 1668 for (i=xs; i<xs+s; i++) { 1669 for (j=0; j<nc; j++) { 1670 cnt = 0; 1671 if (rank) { 1672 for (l=0; l<s; l++) { 1673 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1674 } 1675 } 1676 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1677 for (l=0; l<s; l++) { 1678 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1679 } 1680 } 1681 if (dfill) { 1682 for (k=dfill[j]; k<dfill[j+1]; k++) { 1683 cols[cnt++] = i*nc + dfill[k]; 1684 } 1685 } else { 1686 for (k=0; k<nc; k++) { 1687 cols[cnt++] = i*nc + k; 1688 } 1689 } 1690 for (l=0; l<s; l++) { 1691 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1692 } 1693 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; 1695 } 1696 } 1697 for (i=xs+s; i<xs+nx-s; i++) { 1698 for (j=0; j<nc; j++) { 1699 cnt = 0; 1700 for (l=0; l<s; l++) { 1701 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1702 } 1703 if (dfill) { 1704 for (k=dfill[j]; k<dfill[j+1]; k++) { 1705 cols[cnt++] = i*nc + dfill[k]; 1706 } 1707 } else { 1708 for (k=0; k<nc; k++) { 1709 cols[cnt++] = i*nc + k; 1710 } 1711 } 1712 for (l=0; l<s; l++) { 1713 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1714 } 1715 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1716 row++; 1717 } 1718 } 1719 /* coupling with process to the right */ 1720 for (i=xs+nx-s; i<xs+nx; i++) { 1721 for (j=0; j<nc; j++) { 1722 cnt = 0; 1723 for (l=0; l<s; l++) { 1724 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1725 } 1726 if (dfill) { 1727 for (k=dfill[j]; k<dfill[j+1]; k++) { 1728 cols[cnt++] = i*nc + dfill[k]; 1729 } 1730 } else { 1731 for (k=0; k<nc; k++) { 1732 cols[cnt++] = i*nc + k; 1733 } 1734 } 1735 if (rank < size-1) { 1736 for (l=0; l<s; l++) { 1737 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1738 } 1739 } 1740 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1741 for (l=0; l<s; l++) { 1742 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1743 } 1744 } 1745 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1746 row++; 1747 } 1748 } 1749 ierr = PetscFree(cols);CHKERRQ(ierr); 1750 /* do not copy values to GPU since they are all zero and not yet needed there */ 1751 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1752 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1754 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1755 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 /* ---------------------------------------------------------------------------------*/ 1761 1762 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1763 { 1764 PetscErrorCode ierr; 1765 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1766 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1767 PetscInt istart,iend; 1768 DMBoundaryType bx; 1769 ISLocalToGlobalMapping ltog,mltog; 1770 1771 PetscFunctionBegin; 1772 /* 1773 nc - number of components per grid point 1774 col - number of colors needed in one direction for single component problem 1775 1776 */ 1777 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1778 if (!isIS && bx == DM_BOUNDARY_NONE) { 1779 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1780 } 1781 col = 2*s + 1; 1782 1783 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1784 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1785 1786 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1787 ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr); 1788 ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr); 1789 1790 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1791 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1792 if (!mltog) { 1793 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1794 } 1795 1796 /* 1797 For each node in the grid: we get the neighbors in the local (on processor ordering 1798 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1799 PETSc ordering. 1800 */ 1801 if (!da->prealloc_only) { 1802 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1803 for (i=xs; i<xs+nx; i++) { 1804 istart = PetscMax(-s,gxs - i); 1805 iend = PetscMin(s,gxs + gnx - i - 1); 1806 slot = i - gxs; 1807 1808 cnt = 0; 1809 for (i1=istart; i1<iend+1; i1++) { 1810 cols[cnt++] = nc*(slot + i1); 1811 for (l=1; l<nc; l++) { 1812 cols[cnt] = 1 + cols[cnt-1];cnt++; 1813 } 1814 } 1815 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1816 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1817 } 1818 /* do not copy values to GPU since they are all zero and not yet needed there */ 1819 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1820 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 if (!isIS && bx == DM_BOUNDARY_NONE) { 1823 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1824 } 1825 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1826 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1827 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1828 } 1829 PetscFunctionReturn(0); 1830 } 1831 1832 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1833 { 1834 PetscErrorCode ierr; 1835 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1836 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1837 PetscInt istart,iend,jstart,jend,ii,jj; 1838 MPI_Comm comm; 1839 PetscScalar *values; 1840 DMBoundaryType bx,by; 1841 DMDAStencilType st; 1842 ISLocalToGlobalMapping ltog; 1843 1844 PetscFunctionBegin; 1845 /* 1846 nc - number of components per grid point 1847 col - number of colors needed in one direction for single component problem 1848 */ 1849 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1850 col = 2*s + 1; 1851 1852 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1853 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1854 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1855 1856 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1857 1858 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1859 1860 /* determine the matrix preallocation information */ 1861 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1862 for (i=xs; i<xs+nx; i++) { 1863 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1864 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1865 for (j=ys; j<ys+ny; j++) { 1866 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1867 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1868 slot = i - gxs + gnx*(j - gys); 1869 1870 /* Find block columns in block row */ 1871 cnt = 0; 1872 for (ii=istart; ii<iend+1; ii++) { 1873 for (jj=jstart; jj<jend+1; jj++) { 1874 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1875 cols[cnt++] = slot + ii + gnx*jj; 1876 } 1877 } 1878 } 1879 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1880 } 1881 } 1882 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1883 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1884 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1885 1886 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1887 1888 /* 1889 For each node in the grid: we get the neighbors in the local (on processor ordering 1890 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1891 PETSc ordering. 1892 */ 1893 if (!da->prealloc_only) { 1894 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1895 for (i=xs; i<xs+nx; i++) { 1896 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1897 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1898 for (j=ys; j<ys+ny; j++) { 1899 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1900 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1901 slot = i - gxs + gnx*(j - gys); 1902 cnt = 0; 1903 for (ii=istart; ii<iend+1; ii++) { 1904 for (jj=jstart; jj<jend+1; jj++) { 1905 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1906 cols[cnt++] = slot + ii + gnx*jj; 1907 } 1908 } 1909 } 1910 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1911 } 1912 } 1913 ierr = PetscFree(values);CHKERRQ(ierr); 1914 /* do not copy values to GPU since they are all zero and not yet needed there */ 1915 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1916 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1917 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1918 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1919 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1920 } 1921 ierr = PetscFree(cols);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1926 { 1927 PetscErrorCode ierr; 1928 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1929 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1930 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1931 MPI_Comm comm; 1932 PetscScalar *values; 1933 DMBoundaryType bx,by,bz; 1934 DMDAStencilType st; 1935 ISLocalToGlobalMapping ltog; 1936 1937 PetscFunctionBegin; 1938 /* 1939 nc - number of components per grid point 1940 col - number of colors needed in one direction for single component problem 1941 1942 */ 1943 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1944 col = 2*s + 1; 1945 1946 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1947 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1948 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1949 1950 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1951 1952 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1953 1954 /* determine the matrix preallocation information */ 1955 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1956 for (i=xs; i<xs+nx; i++) { 1957 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1958 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1959 for (j=ys; j<ys+ny; j++) { 1960 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1961 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1962 for (k=zs; k<zs+nz; k++) { 1963 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1964 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1965 1966 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1967 1968 /* Find block columns in block row */ 1969 cnt = 0; 1970 for (ii=istart; ii<iend+1; ii++) { 1971 for (jj=jstart; jj<jend+1; jj++) { 1972 for (kk=kstart; kk<kend+1; kk++) { 1973 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1974 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1975 } 1976 } 1977 } 1978 } 1979 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1980 } 1981 } 1982 } 1983 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1984 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1985 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1986 1987 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1988 1989 /* 1990 For each node in the grid: we get the neighbors in the local (on processor ordering 1991 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1992 PETSc ordering. 1993 */ 1994 if (!da->prealloc_only) { 1995 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1996 for (i=xs; i<xs+nx; i++) { 1997 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1998 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1999 for (j=ys; j<ys+ny; j++) { 2000 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2001 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2002 for (k=zs; k<zs+nz; k++) { 2003 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2004 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2005 2006 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2007 2008 cnt = 0; 2009 for (ii=istart; ii<iend+1; ii++) { 2010 for (jj=jstart; jj<jend+1; jj++) { 2011 for (kk=kstart; kk<kend+1; kk++) { 2012 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2013 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2014 } 2015 } 2016 } 2017 } 2018 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2019 } 2020 } 2021 } 2022 ierr = PetscFree(values);CHKERRQ(ierr); 2023 /* do not copy values to GPU since they are all zero and not yet needed there */ 2024 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2025 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2026 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2027 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2028 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2029 } 2030 ierr = PetscFree(cols);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /* 2035 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2036 identify in the local ordering with periodic domain. 2037 */ 2038 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2039 { 2040 PetscErrorCode ierr; 2041 PetscInt i,n; 2042 2043 PetscFunctionBegin; 2044 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2045 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2046 for (i=0,n=0; i<*cnt; i++) { 2047 if (col[i] >= *row) col[n++] = col[i]; 2048 } 2049 *cnt = n; 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2054 { 2055 PetscErrorCode ierr; 2056 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2057 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2058 PetscInt istart,iend,jstart,jend,ii,jj; 2059 MPI_Comm comm; 2060 PetscScalar *values; 2061 DMBoundaryType bx,by; 2062 DMDAStencilType st; 2063 ISLocalToGlobalMapping ltog; 2064 2065 PetscFunctionBegin; 2066 /* 2067 nc - number of components per grid point 2068 col - number of colors needed in one direction for single component problem 2069 */ 2070 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 2071 col = 2*s + 1; 2072 2073 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 2074 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 2075 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2076 2077 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2078 2079 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2080 2081 /* determine the matrix preallocation information */ 2082 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2083 for (i=xs; i<xs+nx; i++) { 2084 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2085 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2086 for (j=ys; j<ys+ny; j++) { 2087 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2088 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2089 slot = i - gxs + gnx*(j - gys); 2090 2091 /* Find block columns in block row */ 2092 cnt = 0; 2093 for (ii=istart; ii<iend+1; ii++) { 2094 for (jj=jstart; jj<jend+1; jj++) { 2095 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2096 cols[cnt++] = slot + ii + gnx*jj; 2097 } 2098 } 2099 } 2100 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2101 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2102 } 2103 } 2104 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2105 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2106 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2107 2108 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2109 2110 /* 2111 For each node in the grid: we get the neighbors in the local (on processor ordering 2112 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2113 PETSc ordering. 2114 */ 2115 if (!da->prealloc_only) { 2116 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2117 for (i=xs; i<xs+nx; i++) { 2118 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2119 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2120 for (j=ys; j<ys+ny; j++) { 2121 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2122 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2123 slot = i - gxs + gnx*(j - gys); 2124 2125 /* Find block columns in block row */ 2126 cnt = 0; 2127 for (ii=istart; ii<iend+1; ii++) { 2128 for (jj=jstart; jj<jend+1; jj++) { 2129 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2130 cols[cnt++] = slot + ii + gnx*jj; 2131 } 2132 } 2133 } 2134 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2135 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2136 } 2137 } 2138 ierr = PetscFree(values);CHKERRQ(ierr); 2139 /* do not copy values to GPU since they are all zero and not yet needed there */ 2140 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2141 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2144 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2145 } 2146 ierr = PetscFree(cols);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2151 { 2152 PetscErrorCode ierr; 2153 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2154 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2155 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2156 MPI_Comm comm; 2157 PetscScalar *values; 2158 DMBoundaryType bx,by,bz; 2159 DMDAStencilType st; 2160 ISLocalToGlobalMapping ltog; 2161 2162 PetscFunctionBegin; 2163 /* 2164 nc - number of components per grid point 2165 col - number of colors needed in one direction for single component problem 2166 */ 2167 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2168 col = 2*s + 1; 2169 2170 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2171 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2172 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2173 2174 /* create the matrix */ 2175 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2176 2177 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2178 2179 /* determine the matrix preallocation information */ 2180 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2181 for (i=xs; i<xs+nx; i++) { 2182 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2183 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2184 for (j=ys; j<ys+ny; j++) { 2185 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2186 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2187 for (k=zs; k<zs+nz; k++) { 2188 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2189 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2190 2191 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2192 2193 /* Find block columns in block row */ 2194 cnt = 0; 2195 for (ii=istart; ii<iend+1; ii++) { 2196 for (jj=jstart; jj<jend+1; jj++) { 2197 for (kk=kstart; kk<kend+1; kk++) { 2198 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2199 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2200 } 2201 } 2202 } 2203 } 2204 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2205 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2206 } 2207 } 2208 } 2209 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2210 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2211 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2212 2213 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2214 2215 /* 2216 For each node in the grid: we get the neighbors in the local (on processor ordering 2217 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2218 PETSc ordering. 2219 */ 2220 if (!da->prealloc_only) { 2221 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2222 for (i=xs; i<xs+nx; i++) { 2223 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2224 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2225 for (j=ys; j<ys+ny; j++) { 2226 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2227 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2228 for (k=zs; k<zs+nz; k++) { 2229 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2230 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2231 2232 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2233 2234 cnt = 0; 2235 for (ii=istart; ii<iend+1; ii++) { 2236 for (jj=jstart; jj<jend+1; jj++) { 2237 for (kk=kstart; kk<kend+1; kk++) { 2238 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2239 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2240 } 2241 } 2242 } 2243 } 2244 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2245 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 } 2248 } 2249 ierr = PetscFree(values);CHKERRQ(ierr); 2250 /* do not copy values to GPU since they are all zero and not yet needed there */ 2251 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2252 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2255 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2256 } 2257 ierr = PetscFree(cols);CHKERRQ(ierr); 2258 PetscFunctionReturn(0); 2259 } 2260 2261 /* ---------------------------------------------------------------------------------*/ 2262 2263 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2264 { 2265 PetscErrorCode ierr; 2266 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2267 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2268 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2269 DM_DA *dd = (DM_DA*)da->data; 2270 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2271 MPI_Comm comm; 2272 PetscScalar *values; 2273 DMBoundaryType bx,by,bz; 2274 ISLocalToGlobalMapping ltog; 2275 DMDAStencilType st; 2276 PetscBool removedups = PETSC_FALSE; 2277 2278 PetscFunctionBegin; 2279 /* 2280 nc - number of components per grid point 2281 col - number of colors needed in one direction for single component problem 2282 2283 */ 2284 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2285 col = 2*s + 1; 2286 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2287 by 2*stencil_width + 1\n"); 2288 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2289 by 2*stencil_width + 1\n"); 2290 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2291 by 2*stencil_width + 1\n"); 2292 2293 /* 2294 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2295 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2296 */ 2297 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2298 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2299 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2300 2301 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2302 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2303 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2304 2305 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2306 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2307 2308 /* determine the matrix preallocation information */ 2309 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2310 2311 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2312 for (i=xs; i<xs+nx; i++) { 2313 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2314 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2315 for (j=ys; j<ys+ny; j++) { 2316 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2317 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2318 for (k=zs; k<zs+nz; k++) { 2319 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2320 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2321 2322 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2323 2324 for (l=0; l<nc; l++) { 2325 cnt = 0; 2326 for (ii=istart; ii<iend+1; ii++) { 2327 for (jj=jstart; jj<jend+1; jj++) { 2328 for (kk=kstart; kk<kend+1; kk++) { 2329 if (ii || jj || kk) { 2330 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2331 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2332 } 2333 } else { 2334 if (dfill) { 2335 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2336 } else { 2337 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2338 } 2339 } 2340 } 2341 } 2342 } 2343 row = l + nc*(slot); 2344 maxcnt = PetscMax(maxcnt,cnt); 2345 if (removedups) { 2346 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2347 } else { 2348 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2349 } 2350 } 2351 } 2352 } 2353 } 2354 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2355 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2356 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2357 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2358 2359 /* 2360 For each node in the grid: we get the neighbors in the local (on processor ordering 2361 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2362 PETSc ordering. 2363 */ 2364 if (!da->prealloc_only) { 2365 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2366 for (i=xs; i<xs+nx; i++) { 2367 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2368 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2369 for (j=ys; j<ys+ny; j++) { 2370 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2371 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2372 for (k=zs; k<zs+nz; k++) { 2373 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2374 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2375 2376 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2377 2378 for (l=0; l<nc; l++) { 2379 cnt = 0; 2380 for (ii=istart; ii<iend+1; ii++) { 2381 for (jj=jstart; jj<jend+1; jj++) { 2382 for (kk=kstart; kk<kend+1; kk++) { 2383 if (ii || jj || kk) { 2384 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2385 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2386 } 2387 } else { 2388 if (dfill) { 2389 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2390 } else { 2391 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2392 } 2393 } 2394 } 2395 } 2396 } 2397 row = l + nc*(slot); 2398 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2399 } 2400 } 2401 } 2402 } 2403 ierr = PetscFree(values);CHKERRQ(ierr); 2404 /* do not copy values to GPU since they are all zero and not yet needed there */ 2405 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2406 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2407 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2408 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2409 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2410 } 2411 ierr = PetscFree(cols);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414