1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscMemcpy(*rfill,dfillsparse,(nz+w+1)*sizeof(PetscInt));CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 70 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 71 { 72 PetscErrorCode ierr; 73 PetscInt i,k,cnt = 1; 74 75 PetscFunctionBegin; 76 77 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 78 columns to the left with any nonzeros in them plus 1 */ 79 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 80 for (i=0; i<dd->w; i++) { 81 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 82 } 83 for (i=0; i<dd->w; i++) { 84 if (dd->ofillcols[i]) { 85 dd->ofillcols[i] = cnt++; 86 } 87 } 88 PetscFunctionReturn(0); 89 } 90 91 92 93 /*@ 94 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 95 of the matrix returned by DMCreateMatrix(). 96 97 Logically Collective on DMDA 98 99 Input Parameter: 100 + da - the distributed array 101 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 102 - ofill - the fill pattern in the off-diagonal blocks 103 104 105 Level: developer 106 107 Notes: 108 This only makes sense when you are doing multicomponent problems but using the 109 MPIAIJ matrix format 110 111 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 112 representing coupling and 0 entries for missing coupling. For example 113 $ dfill[9] = {1, 0, 0, 114 $ 1, 1, 0, 115 $ 0, 1, 1} 116 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 117 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 118 diagonal block). 119 120 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 121 can be represented in the dfill, ofill format 122 123 Contributed by Glenn Hammond 124 125 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 126 127 @*/ 128 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 129 { 130 DM_DA *dd = (DM_DA*)da->data; 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 /* save the given dfill and ofill information */ 135 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 136 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 137 138 /* count nonzeros in ofill columns */ 139 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 145 /*@ 146 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 147 of the matrix returned by DMCreateMatrix(), using sparse representations 148 of fill patterns. 149 150 Logically Collective on DMDA 151 152 Input Parameter: 153 + da - the distributed array 154 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 155 - ofill - the sparse fill pattern in the off-diagonal blocks 156 157 158 Level: developer 159 160 Notes: This only makes sense when you are doing multicomponent problems but using the 161 MPIAIJ matrix format 162 163 The format for dfill and ofill is a sparse representation of a 164 dof-by-dof matrix with 1 entries representing coupling and 0 entries 165 for missing coupling. The sparse representation is a 1 dimensional 166 array of length nz + dof + 1, where nz is the number of non-zeros in 167 the matrix. The first dof entries in the array give the 168 starting array indices of each row's items in the rest of the array, 169 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 170 and the remaining nz items give the column indices of each of 171 the 1s within the logical 2D matrix. Each row's items within 172 the array are the column indices of the 1s within that row 173 of the 2D matrix. PETSc developers may recognize that this is the 174 same format as that computed by the DMDASetBlockFills_Private() 175 function from a dense 2D matrix representation. 176 177 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 178 can be represented in the dfill, ofill format 179 180 Contributed by Philip C. Roth 181 182 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 183 184 @*/ 185 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 186 { 187 DM_DA *dd = (DM_DA*)da->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 /* save the given dfill and ofill information */ 192 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 193 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 194 195 /* count nonzeros in ofill columns */ 196 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 197 198 PetscFunctionReturn(0); 199 } 200 201 202 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 203 { 204 PetscErrorCode ierr; 205 PetscInt dim,m,n,p,nc; 206 DMBoundaryType bx,by,bz; 207 MPI_Comm comm; 208 PetscMPIInt size; 209 PetscBool isBAIJ; 210 DM_DA *dd = (DM_DA*)da->data; 211 212 PetscFunctionBegin; 213 /* 214 m 215 ------------------------------------------------------ 216 | | 217 | | 218 | ---------------------- | 219 | | | | 220 n | yn | | | 221 | | | | 222 | .--------------------- | 223 | (xs,ys) xn | 224 | . | 225 | (gxs,gys) | 226 | | 227 ----------------------------------------------------- 228 */ 229 230 /* 231 nc - number of components per grid point 232 col - number of colors needed in one direction for single component problem 233 234 */ 235 ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr); 236 237 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 239 if (ctype == IS_COLORING_LOCAL) { 240 if (size == 1) { 241 ctype = IS_COLORING_GLOBAL; 242 } else if (dim > 1) { 243 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 244 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 245 } 246 } 247 } 248 249 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 250 matrices is for the blocks, not the individual matrix elements */ 251 ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 252 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 253 if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 254 if (isBAIJ) { 255 dd->w = 1; 256 dd->xs = dd->xs/nc; 257 dd->xe = dd->xe/nc; 258 dd->Xs = dd->Xs/nc; 259 dd->Xe = dd->Xe/nc; 260 } 261 262 /* 263 We do not provide a getcoloring function in the DMDA operations because 264 the basic DMDA does not know about matrices. We think of DMDA as being more 265 more low-level then matrices. 266 */ 267 if (dim == 1) { 268 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 269 } else if (dim == 2) { 270 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 271 } else if (dim == 3) { 272 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 273 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 274 if (isBAIJ) { 275 dd->w = nc; 276 dd->xs = dd->xs*nc; 277 dd->xe = dd->xe*nc; 278 dd->Xs = dd->Xs*nc; 279 dd->Xe = dd->Xe*nc; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 /* ---------------------------------------------------------------------------------*/ 285 286 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 287 { 288 PetscErrorCode ierr; 289 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 290 PetscInt ncolors; 291 MPI_Comm comm; 292 DMBoundaryType bx,by; 293 DMDAStencilType st; 294 ISColoringValue *colors; 295 DM_DA *dd = (DM_DA*)da->data; 296 297 PetscFunctionBegin; 298 /* 299 nc - number of components per grid point 300 col - number of colors needed in one direction for single component problem 301 302 */ 303 ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 304 col = 2*s + 1; 305 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 306 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 307 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 308 309 /* special case as taught to us by Paul Hovland */ 310 if (st == DMDA_STENCIL_STAR && s == 1) { 311 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 312 } else { 313 314 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\ 315 by 2*stencil_width + 1 (%d)\n", m, col); 316 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\ 317 by 2*stencil_width + 1 (%d)\n", n, col); 318 if (ctype == IS_COLORING_GLOBAL) { 319 if (!dd->localcoloring) { 320 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 321 ii = 0; 322 for (j=ys; j<ys+ny; j++) { 323 for (i=xs; i<xs+nx; i++) { 324 for (k=0; k<nc; k++) { 325 colors[ii++] = k + nc*((i % col) + col*(j % col)); 326 } 327 } 328 } 329 ncolors = nc + nc*(col-1 + col*(col-1)); 330 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 331 } 332 *coloring = dd->localcoloring; 333 } else if (ctype == IS_COLORING_LOCAL) { 334 if (!dd->ghostedcoloring) { 335 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 336 ii = 0; 337 for (j=gys; j<gys+gny; j++) { 338 for (i=gxs; i<gxs+gnx; i++) { 339 for (k=0; k<nc; k++) { 340 /* the complicated stuff is to handle periodic boundaries */ 341 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 342 } 343 } 344 } 345 ncolors = nc + nc*(col - 1 + col*(col-1)); 346 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 347 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 348 349 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 350 } 351 *coloring = dd->ghostedcoloring; 352 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 353 } 354 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /* ---------------------------------------------------------------------------------*/ 359 360 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 361 { 362 PetscErrorCode ierr; 363 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 364 PetscInt ncolors; 365 MPI_Comm comm; 366 DMBoundaryType bx,by,bz; 367 DMDAStencilType st; 368 ISColoringValue *colors; 369 DM_DA *dd = (DM_DA*)da->data; 370 371 PetscFunctionBegin; 372 /* 373 nc - number of components per grid point 374 col - number of colors needed in one direction for single component problem 375 376 */ 377 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 378 col = 2*s + 1; 379 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 380 by 2*stencil_width + 1\n"); 381 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 382 by 2*stencil_width + 1\n"); 383 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 384 by 2*stencil_width + 1\n"); 385 386 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 387 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 388 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 389 390 /* create the coloring */ 391 if (ctype == IS_COLORING_GLOBAL) { 392 if (!dd->localcoloring) { 393 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 394 ii = 0; 395 for (k=zs; k<zs+nz; k++) { 396 for (j=ys; j<ys+ny; j++) { 397 for (i=xs; i<xs+nx; i++) { 398 for (l=0; l<nc; l++) { 399 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 400 } 401 } 402 } 403 } 404 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 405 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 406 } 407 *coloring = dd->localcoloring; 408 } else if (ctype == IS_COLORING_LOCAL) { 409 if (!dd->ghostedcoloring) { 410 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 411 ii = 0; 412 for (k=gzs; k<gzs+gnz; k++) { 413 for (j=gys; j<gys+gny; j++) { 414 for (i=gxs; i<gxs+gnx; i++) { 415 for (l=0; l<nc; l++) { 416 /* the complicated stuff is to handle periodic boundaries */ 417 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 418 } 419 } 420 } 421 } 422 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 423 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 424 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 425 } 426 *coloring = dd->ghostedcoloring; 427 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 428 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /* ---------------------------------------------------------------------------------*/ 433 434 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 435 { 436 PetscErrorCode ierr; 437 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 438 PetscInt ncolors; 439 MPI_Comm comm; 440 DMBoundaryType bx; 441 ISColoringValue *colors; 442 DM_DA *dd = (DM_DA*)da->data; 443 444 PetscFunctionBegin; 445 /* 446 nc - number of components per grid point 447 col - number of colors needed in one direction for single component problem 448 449 */ 450 ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 451 col = 2*s + 1; 452 453 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\ 454 by 2*stencil_width + 1 %d\n",(int)m,(int)col); 455 456 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 457 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 458 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 459 460 /* create the coloring */ 461 if (ctype == IS_COLORING_GLOBAL) { 462 if (!dd->localcoloring) { 463 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 464 if (dd->ofillcols) { 465 PetscInt tc = 0; 466 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 467 i1 = 0; 468 for (i=xs; i<xs+nx; i++) { 469 for (l=0; l<nc; l++) { 470 if (dd->ofillcols[l] && (i % col)) { 471 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 472 } else { 473 colors[i1++] = l; 474 } 475 } 476 } 477 ncolors = nc + 2*s*tc; 478 } else { 479 i1 = 0; 480 for (i=xs; i<xs+nx; i++) { 481 for (l=0; l<nc; l++) { 482 colors[i1++] = l + nc*(i % col); 483 } 484 } 485 ncolors = nc + nc*(col-1); 486 } 487 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 488 } 489 *coloring = dd->localcoloring; 490 } else if (ctype == IS_COLORING_LOCAL) { 491 if (!dd->ghostedcoloring) { 492 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 493 i1 = 0; 494 for (i=gxs; i<gxs+gnx; i++) { 495 for (l=0; l<nc; l++) { 496 /* the complicated stuff is to handle periodic boundaries */ 497 colors[i1++] = l + nc*(SetInRange(i,m) % col); 498 } 499 } 500 ncolors = nc + nc*(col-1); 501 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 502 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 503 } 504 *coloring = dd->ghostedcoloring; 505 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 506 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 511 { 512 PetscErrorCode ierr; 513 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 514 PetscInt ncolors; 515 MPI_Comm comm; 516 DMBoundaryType bx,by; 517 ISColoringValue *colors; 518 DM_DA *dd = (DM_DA*)da->data; 519 520 PetscFunctionBegin; 521 /* 522 nc - number of components per grid point 523 col - number of colors needed in one direction for single component problem 524 525 */ 526 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr); 527 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 528 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 529 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 530 531 if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n"); 532 if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n"); 533 534 /* create the coloring */ 535 if (ctype == IS_COLORING_GLOBAL) { 536 if (!dd->localcoloring) { 537 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 538 ii = 0; 539 for (j=ys; j<ys+ny; j++) { 540 for (i=xs; i<xs+nx; i++) { 541 for (k=0; k<nc; k++) { 542 colors[ii++] = k + nc*((3*j+i) % 5); 543 } 544 } 545 } 546 ncolors = 5*nc; 547 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 548 } 549 *coloring = dd->localcoloring; 550 } else if (ctype == IS_COLORING_LOCAL) { 551 if (!dd->ghostedcoloring) { 552 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 553 ii = 0; 554 for (j=gys; j<gys+gny; j++) { 555 for (i=gxs; i<gxs+gnx; i++) { 556 for (k=0; k<nc; k++) { 557 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 558 } 559 } 560 } 561 ncolors = 5*nc; 562 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 563 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 564 } 565 *coloring = dd->ghostedcoloring; 566 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 567 PetscFunctionReturn(0); 568 } 569 570 /* =========================================================================== */ 571 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 572 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 573 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 574 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 575 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 576 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 577 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 578 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 579 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 580 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 581 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 582 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 583 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 584 585 /*@C 586 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 587 588 Logically Collective on Mat 589 590 Input Parameters: 591 + mat - the matrix 592 - da - the da 593 594 Level: intermediate 595 596 @*/ 597 PetscErrorCode MatSetupDM(Mat mat,DM da) 598 { 599 PetscErrorCode ierr; 600 601 PetscFunctionBegin; 602 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 603 PetscValidHeaderSpecificType(da,DM_CLASSID,1,DMDA); 604 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 609 { 610 DM da; 611 PetscErrorCode ierr; 612 const char *prefix; 613 Mat Anatural; 614 AO ao; 615 PetscInt rstart,rend,*petsc,i; 616 IS is; 617 MPI_Comm comm; 618 PetscViewerFormat format; 619 620 PetscFunctionBegin; 621 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 624 625 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 626 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 627 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 628 629 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 630 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 631 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 632 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 633 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 634 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 635 636 /* call viewer on natural ordering */ 637 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 638 ierr = ISDestroy(&is);CHKERRQ(ierr); 639 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 640 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 641 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 642 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 643 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 644 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 645 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 650 { 651 DM da; 652 PetscErrorCode ierr; 653 Mat Anatural,Aapp; 654 AO ao; 655 PetscInt rstart,rend,*app,i,m,n,M,N; 656 IS is; 657 MPI_Comm comm; 658 659 PetscFunctionBegin; 660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 661 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 662 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 663 664 /* Load the matrix in natural ordering */ 665 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 666 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 667 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 669 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 670 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 671 672 /* Map natural ordering to application ordering and create IS */ 673 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 674 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 675 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 676 for (i=rstart; i<rend; i++) app[i-rstart] = i; 677 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 678 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 679 680 /* Do permutation and replace header */ 681 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 682 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 683 ierr = ISDestroy(&is);CHKERRQ(ierr); 684 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 689 { 690 PetscErrorCode ierr; 691 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 692 Mat A; 693 MPI_Comm comm; 694 MatType Atype; 695 PetscSection section, sectionGlobal; 696 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 697 MatType mtype; 698 PetscMPIInt size; 699 DM_DA *dd = (DM_DA*)da->data; 700 701 PetscFunctionBegin; 702 ierr = MatInitializePackage();CHKERRQ(ierr); 703 mtype = da->mattype; 704 705 ierr = DMGetSection(da, §ion);CHKERRQ(ierr); 706 if (section) { 707 PetscInt bs = -1; 708 PetscInt localSize; 709 PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric; 710 711 ierr = DMGetGlobalSection(da, §ionGlobal);CHKERRQ(ierr); 712 ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr); 713 ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr); 714 ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 715 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 716 ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr); 717 ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr); 718 ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr); 719 ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr); 720 ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr); 721 ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr); 722 ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr); 723 /* Check for symmetric storage */ 724 isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock); 725 if (isSymmetric) { 726 ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr); 727 } 728 if (!isShell) { 729 PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal; 730 731 if (bs < 0) { 732 if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) { 733 PetscInt pStart, pEnd, p, dof; 734 735 ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr); 736 for (p = pStart; p < pEnd; ++p) { 737 ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr); 738 if (dof) { 739 bs = dof; 740 break; 741 } 742 } 743 } else { 744 bs = 1; 745 } 746 /* Must have same blocksize on all procs (some might have no points) */ 747 bsLocal = bs; 748 ierr = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr); 749 } 750 ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr); 751 /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */ 752 ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr); 753 } 754 } 755 /* 756 m 757 ------------------------------------------------------ 758 | | 759 | | 760 | ---------------------- | 761 | | | | 762 n | ny | | | 763 | | | | 764 | .--------------------- | 765 | (xs,ys) nx | 766 | . | 767 | (gxs,gys) | 768 | | 769 ----------------------------------------------------- 770 */ 771 772 /* 773 nc - number of components per grid point 774 col - number of colors needed in one direction for single component problem 775 776 */ 777 M = dd->M; 778 N = dd->N; 779 P = dd->P; 780 dim = da->dim; 781 dof = dd->w; 782 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */ 783 ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr); 784 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 785 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 786 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 787 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 788 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 789 ierr = MatSetDM(A,da);CHKERRQ(ierr); 790 if (da->structure_only) { 791 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 792 } 793 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 794 /* 795 We do not provide a getmatrix function in the DMDA operations because 796 the basic DMDA does not know about matrices. We think of DMDA as being more 797 more low-level than matrices. This is kind of cheating but, cause sometimes 798 we think of DMDA has higher level than matrices. 799 800 We could switch based on Atype (or mtype), but we do not since the 801 specialized setting routines depend only on the particular preallocation 802 details of the matrix, not the type itself. 803 */ 804 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 805 if (!aij) { 806 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 807 } 808 if (!aij) { 809 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 810 if (!baij) { 811 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 812 } 813 if (!baij) { 814 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 815 if (!sbaij) { 816 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 817 } 818 if (!sbaij) { 819 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 820 if (!sell) { 821 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 822 } 823 } 824 if (!sell) { 825 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 826 } 827 } 828 } 829 if (aij) { 830 if (dim == 1) { 831 if (dd->ofill) { 832 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 833 } else { 834 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr); 835 } 836 } else if (dim == 2) { 837 if (dd->ofill) { 838 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 839 } else { 840 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr); 841 } 842 } else if (dim == 3) { 843 if (dd->ofill) { 844 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 845 } else { 846 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr); 847 } 848 } 849 } else if (baij) { 850 if (dim == 2) { 851 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 852 } else if (dim == 3) { 853 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 854 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 855 } else if (sbaij) { 856 if (dim == 2) { 857 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 858 } else if (dim == 3) { 859 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 860 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 861 } else if (sell) { 862 if (dim == 2) { 863 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 864 } else if (dim == 3) { 865 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 866 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 867 } else if (is) { 868 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 869 } else { 870 ISLocalToGlobalMapping ltog; 871 872 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 873 ierr = MatSetUp(A);CHKERRQ(ierr); 874 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 875 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 876 } 877 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 878 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 879 ierr = MatSetDM(A,da);CHKERRQ(ierr); 880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 881 if (size > 1) { 882 /* change viewer to display matrix in natural ordering */ 883 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 884 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 885 } 886 *J = A; 887 PetscFunctionReturn(0); 888 } 889 890 /* ---------------------------------------------------------------------------------*/ 891 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 892 893 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 894 { 895 DM_DA *da = (DM_DA*)dm->data; 896 Mat lJ; 897 ISLocalToGlobalMapping ltog; 898 IS is_loc_filt, is_glob; 899 const PetscInt *e_loc,*idx; 900 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 901 PetscBool flg; 902 PetscErrorCode ierr; 903 904 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 905 We need to filter the local indices that are represented through the DMDAGetElements decomposition 906 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 907 PetscFunctionBegin; 908 dof = da->w; 909 dim = dm->dim; 910 911 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 912 913 /* get local elements indices in local DMDA numbering */ 914 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 915 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 916 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 917 918 /* obtain a consistent local ordering for MATIS */ 919 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 920 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 921 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 922 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 923 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 924 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 925 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 926 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 927 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 928 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 929 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 930 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 931 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 932 933 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 934 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 935 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 936 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 937 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 938 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 939 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 940 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 941 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 942 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 943 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 944 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 945 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 946 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 947 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 948 ierr = PetscFree(gidx);CHKERRQ(ierr); 949 950 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 951 flg = dm->prealloc_only; 952 dm->prealloc_only = PETSC_TRUE; 953 switch (dim) { 954 case 1: 955 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 956 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J);CHKERRQ(ierr); 957 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 958 break; 959 case 2: 960 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 961 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J);CHKERRQ(ierr); 962 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 963 break; 964 case 3: 965 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 966 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J);CHKERRQ(ierr); 967 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 968 break; 969 default: 970 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 971 break; 972 } 973 dm->prealloc_only = flg; 974 PetscFunctionReturn(0); 975 } 976 977 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 978 { 979 PetscErrorCode ierr; 980 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 981 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 982 MPI_Comm comm; 983 PetscScalar *values; 984 DMBoundaryType bx,by; 985 ISLocalToGlobalMapping ltog; 986 DMDAStencilType st; 987 988 PetscFunctionBegin; 989 /* 990 nc - number of components per grid point 991 col - number of colors needed in one direction for single component problem 992 993 */ 994 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 995 col = 2*s + 1; 996 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 997 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 998 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 999 1000 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1001 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1002 1003 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1004 /* determine the matrix preallocation information */ 1005 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1006 for (i=xs; i<xs+nx; i++) { 1007 1008 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1009 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1010 1011 for (j=ys; j<ys+ny; j++) { 1012 slot = i - gxs + gnx*(j - gys); 1013 1014 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1015 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1016 1017 cnt = 0; 1018 for (k=0; k<nc; k++) { 1019 for (l=lstart; l<lend+1; l++) { 1020 for (p=pstart; p<pend+1; p++) { 1021 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1022 cols[cnt++] = k + nc*(slot + gnx*l + p); 1023 } 1024 } 1025 } 1026 rows[k] = k + nc*(slot); 1027 } 1028 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1029 } 1030 } 1031 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1032 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1033 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1034 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1035 1036 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1037 1038 /* 1039 For each node in the grid: we get the neighbors in the local (on processor ordering 1040 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1041 PETSc ordering. 1042 */ 1043 if (!da->prealloc_only) { 1044 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1045 for (i=xs; i<xs+nx; i++) { 1046 1047 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1048 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1049 1050 for (j=ys; j<ys+ny; j++) { 1051 slot = i - gxs + gnx*(j - gys); 1052 1053 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1054 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1055 1056 cnt = 0; 1057 for (k=0; k<nc; k++) { 1058 for (l=lstart; l<lend+1; l++) { 1059 for (p=pstart; p<pend+1; p++) { 1060 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1061 cols[cnt++] = k + nc*(slot + gnx*l + p); 1062 } 1063 } 1064 } 1065 rows[k] = k + nc*(slot); 1066 } 1067 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1068 } 1069 } 1070 ierr = PetscFree(values);CHKERRQ(ierr); 1071 /* do not copy values to GPU since they are all zero and not yet needed there */ 1072 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1073 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1075 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1076 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1077 } 1078 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1083 { 1084 PetscErrorCode ierr; 1085 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1086 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1087 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1088 MPI_Comm comm; 1089 PetscScalar *values; 1090 DMBoundaryType bx,by,bz; 1091 ISLocalToGlobalMapping ltog; 1092 DMDAStencilType st; 1093 1094 PetscFunctionBegin; 1095 /* 1096 nc - number of components per grid point 1097 col - number of colors needed in one direction for single component problem 1098 1099 */ 1100 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1101 col = 2*s + 1; 1102 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1103 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1104 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1105 1106 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1107 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1108 1109 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1110 /* determine the matrix preallocation information */ 1111 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1112 for (i=xs; i<xs+nx; i++) { 1113 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1114 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1115 for (j=ys; j<ys+ny; j++) { 1116 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1117 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1118 for (k=zs; k<zs+nz; k++) { 1119 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1120 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1121 1122 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1123 1124 cnt = 0; 1125 for (l=0; l<nc; l++) { 1126 for (ii=istart; ii<iend+1; ii++) { 1127 for (jj=jstart; jj<jend+1; jj++) { 1128 for (kk=kstart; kk<kend+1; kk++) { 1129 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1130 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1131 } 1132 } 1133 } 1134 } 1135 rows[l] = l + nc*(slot); 1136 } 1137 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1138 } 1139 } 1140 } 1141 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1142 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1143 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1144 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1145 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1146 1147 /* 1148 For each node in the grid: we get the neighbors in the local (on processor ordering 1149 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1150 PETSc ordering. 1151 */ 1152 if (!da->prealloc_only) { 1153 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1154 for (i=xs; i<xs+nx; i++) { 1155 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1156 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1157 for (j=ys; j<ys+ny; j++) { 1158 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1159 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1160 for (k=zs; k<zs+nz; k++) { 1161 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1162 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1163 1164 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1165 1166 cnt = 0; 1167 for (l=0; l<nc; l++) { 1168 for (ii=istart; ii<iend+1; ii++) { 1169 for (jj=jstart; jj<jend+1; jj++) { 1170 for (kk=kstart; kk<kend+1; kk++) { 1171 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1172 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1173 } 1174 } 1175 } 1176 } 1177 rows[l] = l + nc*(slot); 1178 } 1179 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1180 } 1181 } 1182 } 1183 ierr = PetscFree(values);CHKERRQ(ierr); 1184 /* do not copy values to GPU since they are all zero and not yet needed there */ 1185 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1186 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1187 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1188 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1189 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1190 } 1191 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1196 { 1197 PetscErrorCode ierr; 1198 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1199 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1200 MPI_Comm comm; 1201 PetscScalar *values; 1202 DMBoundaryType bx,by; 1203 ISLocalToGlobalMapping ltog,mltog; 1204 DMDAStencilType st; 1205 PetscBool removedups = PETSC_FALSE; 1206 1207 PetscFunctionBegin; 1208 /* 1209 nc - number of components per grid point 1210 col - number of colors needed in one direction for single component problem 1211 1212 */ 1213 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1214 col = 2*s + 1; 1215 /* 1216 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1217 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1218 */ 1219 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1220 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1221 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1222 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1223 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1224 1225 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1226 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1227 1228 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1229 /* determine the matrix preallocation information */ 1230 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1231 for (i=xs; i<xs+nx; i++) { 1232 1233 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1234 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1235 1236 for (j=ys; j<ys+ny; j++) { 1237 slot = i - gxs + gnx*(j - gys); 1238 1239 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1240 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1241 1242 cnt = 0; 1243 for (k=0; k<nc; k++) { 1244 for (l=lstart; l<lend+1; l++) { 1245 for (p=pstart; p<pend+1; p++) { 1246 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1247 cols[cnt++] = k + nc*(slot + gnx*l + p); 1248 } 1249 } 1250 } 1251 rows[k] = k + nc*(slot); 1252 } 1253 if (removedups) { 1254 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1255 } else { 1256 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1257 } 1258 } 1259 } 1260 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1263 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1264 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1265 if (!mltog) { 1266 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1267 } 1268 1269 /* 1270 For each node in the grid: we get the neighbors in the local (on processor ordering 1271 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1272 PETSc ordering. 1273 */ 1274 if (!da->prealloc_only) { 1275 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1276 for (i=xs; i<xs+nx; i++) { 1277 1278 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1279 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1280 1281 for (j=ys; j<ys+ny; j++) { 1282 slot = i - gxs + gnx*(j - gys); 1283 1284 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1285 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1286 1287 cnt = 0; 1288 for (k=0; k<nc; k++) { 1289 for (l=lstart; l<lend+1; l++) { 1290 for (p=pstart; p<pend+1; p++) { 1291 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1292 cols[cnt++] = k + nc*(slot + gnx*l + p); 1293 } 1294 } 1295 } 1296 rows[k] = k + nc*(slot); 1297 } 1298 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1299 } 1300 } 1301 ierr = PetscFree(values);CHKERRQ(ierr); 1302 /* do not copy values to GPU since they are all zero and not yet needed there */ 1303 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1304 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1305 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1306 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1307 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1308 } 1309 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1314 { 1315 PetscErrorCode ierr; 1316 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1317 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1318 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1319 DM_DA *dd = (DM_DA*)da->data; 1320 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1321 MPI_Comm comm; 1322 PetscScalar *values; 1323 DMBoundaryType bx,by; 1324 ISLocalToGlobalMapping ltog; 1325 DMDAStencilType st; 1326 PetscBool removedups = PETSC_FALSE; 1327 1328 PetscFunctionBegin; 1329 /* 1330 nc - number of components per grid point 1331 col - number of colors needed in one direction for single component problem 1332 1333 */ 1334 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1335 col = 2*s + 1; 1336 /* 1337 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1338 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1339 */ 1340 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1341 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1342 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1343 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1344 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1345 1346 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1347 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1348 1349 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1350 /* determine the matrix preallocation information */ 1351 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1352 for (i=xs; i<xs+nx; i++) { 1353 1354 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1355 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1356 1357 for (j=ys; j<ys+ny; j++) { 1358 slot = i - gxs + gnx*(j - gys); 1359 1360 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1361 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1362 1363 for (k=0; k<nc; k++) { 1364 cnt = 0; 1365 for (l=lstart; l<lend+1; l++) { 1366 for (p=pstart; p<pend+1; p++) { 1367 if (l || p) { 1368 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1369 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1370 } 1371 } else { 1372 if (dfill) { 1373 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1374 } else { 1375 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1376 } 1377 } 1378 } 1379 } 1380 row = k + nc*(slot); 1381 maxcnt = PetscMax(maxcnt,cnt); 1382 if (removedups) { 1383 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1384 } else { 1385 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1386 } 1387 } 1388 } 1389 } 1390 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1391 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1392 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1393 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1394 1395 /* 1396 For each node in the grid: we get the neighbors in the local (on processor ordering 1397 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1398 PETSc ordering. 1399 */ 1400 if (!da->prealloc_only) { 1401 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 1402 for (i=xs; i<xs+nx; i++) { 1403 1404 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1405 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1406 1407 for (j=ys; j<ys+ny; j++) { 1408 slot = i - gxs + gnx*(j - gys); 1409 1410 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1411 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1412 1413 for (k=0; k<nc; k++) { 1414 cnt = 0; 1415 for (l=lstart; l<lend+1; l++) { 1416 for (p=pstart; p<pend+1; p++) { 1417 if (l || p) { 1418 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1419 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1420 } 1421 } else { 1422 if (dfill) { 1423 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1424 } else { 1425 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1426 } 1427 } 1428 } 1429 } 1430 row = k + nc*(slot); 1431 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1432 } 1433 } 1434 } 1435 ierr = PetscFree(values);CHKERRQ(ierr); 1436 /* do not copy values to GPU since they are all zero and not yet needed there */ 1437 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1438 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1439 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1440 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1441 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1442 } 1443 ierr = PetscFree(cols);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /* ---------------------------------------------------------------------------------*/ 1448 1449 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1450 { 1451 PetscErrorCode ierr; 1452 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1453 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1454 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1455 MPI_Comm comm; 1456 PetscScalar *values; 1457 DMBoundaryType bx,by,bz; 1458 ISLocalToGlobalMapping ltog,mltog; 1459 DMDAStencilType st; 1460 PetscBool removedups = PETSC_FALSE; 1461 1462 PetscFunctionBegin; 1463 /* 1464 nc - number of components per grid point 1465 col - number of colors needed in one direction for single component problem 1466 1467 */ 1468 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1469 col = 2*s + 1; 1470 1471 /* 1472 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1473 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1474 */ 1475 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1476 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1477 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1478 1479 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1480 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1481 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1482 1483 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1484 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1485 1486 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1487 /* determine the matrix preallocation information */ 1488 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1489 for (i=xs; i<xs+nx; i++) { 1490 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1491 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1492 for (j=ys; j<ys+ny; j++) { 1493 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1494 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1495 for (k=zs; k<zs+nz; k++) { 1496 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1497 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1498 1499 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1500 1501 cnt = 0; 1502 for (l=0; l<nc; l++) { 1503 for (ii=istart; ii<iend+1; ii++) { 1504 for (jj=jstart; jj<jend+1; jj++) { 1505 for (kk=kstart; kk<kend+1; kk++) { 1506 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1507 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1508 } 1509 } 1510 } 1511 } 1512 rows[l] = l + nc*(slot); 1513 } 1514 if (removedups) { 1515 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1518 } 1519 } 1520 } 1521 } 1522 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1523 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1525 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1526 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1527 if (!mltog) { 1528 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1529 } 1530 1531 /* 1532 For each node in the grid: we get the neighbors in the local (on processor ordering 1533 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1534 PETSc ordering. 1535 */ 1536 if (!da->prealloc_only) { 1537 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1538 for (i=xs; i<xs+nx; i++) { 1539 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1540 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1541 for (j=ys; j<ys+ny; j++) { 1542 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1543 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1544 for (k=zs; k<zs+nz; k++) { 1545 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1546 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1547 1548 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1549 1550 cnt = 0; 1551 for (l=0; l<nc; l++) { 1552 for (ii=istart; ii<iend+1; ii++) { 1553 for (jj=jstart; jj<jend+1; jj++) { 1554 for (kk=kstart; kk<kend+1; kk++) { 1555 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1556 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1557 } 1558 } 1559 } 1560 } 1561 rows[l] = l + nc*(slot); 1562 } 1563 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1564 } 1565 } 1566 } 1567 ierr = PetscFree(values);CHKERRQ(ierr); 1568 /* do not copy values to GPU since they are all zero and not yet needed there */ 1569 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1570 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1571 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1572 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1573 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1574 } 1575 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /* ---------------------------------------------------------------------------------*/ 1580 1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1582 { 1583 PetscErrorCode ierr; 1584 DM_DA *dd = (DM_DA*)da->data; 1585 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1586 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1587 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1588 PetscScalar *values; 1589 DMBoundaryType bx; 1590 ISLocalToGlobalMapping ltog; 1591 PetscMPIInt rank,size; 1592 1593 PetscFunctionBegin; 1594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr); 1595 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr); 1596 1597 /* 1598 nc - number of components per grid point 1599 1600 */ 1601 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1602 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1603 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1604 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1605 1606 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1607 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1608 1609 /* 1610 note should be smaller for first and last process with no periodic 1611 does not handle dfill 1612 */ 1613 cnt = 0; 1614 /* coupling with process to the left */ 1615 for (i=0; i<s; i++) { 1616 for (j=0; j<nc; j++) { 1617 ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1618 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1619 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1620 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1621 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1622 } 1623 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1624 cnt++; 1625 } 1626 } 1627 for (i=s; i<nx-s; i++) { 1628 for (j=0; j<nc; j++) { 1629 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1630 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1631 cnt++; 1632 } 1633 } 1634 /* coupling with process to the right */ 1635 for (i=nx-s; i<nx; i++) { 1636 for (j=0; j<nc; j++) { 1637 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1638 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1639 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1640 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1641 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1642 } 1643 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1644 cnt++; 1645 } 1646 } 1647 1648 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1649 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1650 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1651 1652 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1653 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1654 1655 /* 1656 For each node in the grid: we get the neighbors in the local (on processor ordering 1657 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1658 PETSc ordering. 1659 */ 1660 if (!da->prealloc_only) { 1661 ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr); 1662 1663 row = xs*nc; 1664 /* coupling with process to the left */ 1665 for (i=xs; i<xs+s; i++) { 1666 for (j=0; j<nc; j++) { 1667 cnt = 0; 1668 if (rank) { 1669 for (l=0; l<s; l++) { 1670 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1671 } 1672 } 1673 if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1674 for (l=0; l<s; l++) { 1675 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1676 } 1677 } 1678 if (dfill) { 1679 for (k=dfill[j]; k<dfill[j+1]; k++) { 1680 cols[cnt++] = i*nc + dfill[k]; 1681 } 1682 } else { 1683 for (k=0; k<nc; k++) { 1684 cols[cnt++] = i*nc + k; 1685 } 1686 } 1687 for (l=0; l<s; l++) { 1688 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1689 } 1690 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1691 row++; 1692 } 1693 } 1694 for (i=xs+s; i<xs+nx-s; i++) { 1695 for (j=0; j<nc; j++) { 1696 cnt = 0; 1697 for (l=0; l<s; l++) { 1698 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1699 } 1700 if (dfill) { 1701 for (k=dfill[j]; k<dfill[j+1]; k++) { 1702 cols[cnt++] = i*nc + dfill[k]; 1703 } 1704 } else { 1705 for (k=0; k<nc; k++) { 1706 cols[cnt++] = i*nc + k; 1707 } 1708 } 1709 for (l=0; l<s; l++) { 1710 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1711 } 1712 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1713 row++; 1714 } 1715 } 1716 /* coupling with process to the right */ 1717 for (i=xs+nx-s; i<xs+nx; i++) { 1718 for (j=0; j<nc; j++) { 1719 cnt = 0; 1720 for (l=0; l<s; l++) { 1721 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1722 } 1723 if (dfill) { 1724 for (k=dfill[j]; k<dfill[j+1]; k++) { 1725 cols[cnt++] = i*nc + dfill[k]; 1726 } 1727 } else { 1728 for (k=0; k<nc; k++) { 1729 cols[cnt++] = i*nc + k; 1730 } 1731 } 1732 if (rank < size-1) { 1733 for (l=0; l<s; l++) { 1734 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1735 } 1736 } 1737 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1738 for (l=0; l<s; l++) { 1739 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1740 } 1741 } 1742 ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1743 row++; 1744 } 1745 } 1746 ierr = PetscFree2(values,cols);CHKERRQ(ierr); 1747 /* do not copy values to GPU since they are all zero and not yet needed there */ 1748 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1749 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1750 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1751 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1752 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1753 } 1754 PetscFunctionReturn(0); 1755 } 1756 1757 /* ---------------------------------------------------------------------------------*/ 1758 1759 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1760 { 1761 PetscErrorCode ierr; 1762 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1763 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1764 PetscInt istart,iend; 1765 PetscScalar *values; 1766 DMBoundaryType bx; 1767 ISLocalToGlobalMapping ltog,mltog; 1768 1769 PetscFunctionBegin; 1770 /* 1771 nc - number of components per grid point 1772 col - number of colors needed in one direction for single component problem 1773 1774 */ 1775 ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr); 1776 col = 2*s + 1; 1777 1778 ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr); 1779 ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr); 1780 1781 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1782 ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr); 1783 ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr); 1784 1785 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1786 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1787 if (!mltog) { 1788 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1789 } 1790 1791 /* 1792 For each node in the grid: we get the neighbors in the local (on processor ordering 1793 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1794 PETSc ordering. 1795 */ 1796 if (!da->prealloc_only) { 1797 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1798 ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr); 1799 for (i=xs; i<xs+nx; i++) { 1800 istart = PetscMax(-s,gxs - i); 1801 iend = PetscMin(s,gxs + gnx - i - 1); 1802 slot = i - gxs; 1803 1804 cnt = 0; 1805 for (l=0; l<nc; l++) { 1806 for (i1=istart; i1<iend+1; i1++) { 1807 cols[cnt++] = l + nc*(slot + i1); 1808 } 1809 rows[l] = l + nc*(slot); 1810 } 1811 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1812 } 1813 ierr = PetscFree(values);CHKERRQ(ierr); 1814 /* do not copy values to GPU since they are all zero and not yet needed there */ 1815 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1816 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1817 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1819 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1820 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1821 } 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1826 { 1827 PetscErrorCode ierr; 1828 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1829 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1830 PetscInt istart,iend,jstart,jend,ii,jj; 1831 MPI_Comm comm; 1832 PetscScalar *values; 1833 DMBoundaryType bx,by; 1834 DMDAStencilType st; 1835 ISLocalToGlobalMapping ltog; 1836 1837 PetscFunctionBegin; 1838 /* 1839 nc - number of components per grid point 1840 col - number of colors needed in one direction for single component problem 1841 */ 1842 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 1843 col = 2*s + 1; 1844 1845 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 1846 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 1847 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1848 1849 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1850 1851 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1852 1853 /* determine the matrix preallocation information */ 1854 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1855 for (i=xs; i<xs+nx; i++) { 1856 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1857 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1858 for (j=ys; j<ys+ny; j++) { 1859 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1860 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1861 slot = i - gxs + gnx*(j - gys); 1862 1863 /* Find block columns in block row */ 1864 cnt = 0; 1865 for (ii=istart; ii<iend+1; ii++) { 1866 for (jj=jstart; jj<jend+1; jj++) { 1867 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1868 cols[cnt++] = slot + ii + gnx*jj; 1869 } 1870 } 1871 } 1872 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1873 } 1874 } 1875 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1876 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1877 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1878 1879 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1880 1881 /* 1882 For each node in the grid: we get the neighbors in the local (on processor ordering 1883 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1884 PETSc ordering. 1885 */ 1886 if (!da->prealloc_only) { 1887 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1888 for (i=xs; i<xs+nx; i++) { 1889 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1890 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1891 for (j=ys; j<ys+ny; j++) { 1892 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1893 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1894 slot = i - gxs + gnx*(j - gys); 1895 cnt = 0; 1896 for (ii=istart; ii<iend+1; ii++) { 1897 for (jj=jstart; jj<jend+1; jj++) { 1898 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1899 cols[cnt++] = slot + ii + gnx*jj; 1900 } 1901 } 1902 } 1903 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1904 } 1905 } 1906 ierr = PetscFree(values);CHKERRQ(ierr); 1907 /* do not copy values to GPU since they are all zero and not yet needed there */ 1908 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1909 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1910 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1911 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1912 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1913 } 1914 ierr = PetscFree(cols);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1919 { 1920 PetscErrorCode ierr; 1921 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1922 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1923 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1924 MPI_Comm comm; 1925 PetscScalar *values; 1926 DMBoundaryType bx,by,bz; 1927 DMDAStencilType st; 1928 ISLocalToGlobalMapping ltog; 1929 1930 PetscFunctionBegin; 1931 /* 1932 nc - number of components per grid point 1933 col - number of colors needed in one direction for single component problem 1934 1935 */ 1936 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1937 col = 2*s + 1; 1938 1939 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1940 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1941 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1942 1943 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1944 1945 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1946 1947 /* determine the matrix preallocation information */ 1948 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1949 for (i=xs; i<xs+nx; i++) { 1950 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1951 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1952 for (j=ys; j<ys+ny; j++) { 1953 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1954 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1955 for (k=zs; k<zs+nz; k++) { 1956 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1957 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1958 1959 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1960 1961 /* Find block columns in block row */ 1962 cnt = 0; 1963 for (ii=istart; ii<iend+1; ii++) { 1964 for (jj=jstart; jj<jend+1; jj++) { 1965 for (kk=kstart; kk<kend+1; kk++) { 1966 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1967 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1968 } 1969 } 1970 } 1971 } 1972 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1973 } 1974 } 1975 } 1976 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1977 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1978 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1979 1980 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1981 1982 /* 1983 For each node in the grid: we get the neighbors in the local (on processor ordering 1984 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1985 PETSc ordering. 1986 */ 1987 if (!da->prealloc_only) { 1988 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1989 for (i=xs; i<xs+nx; i++) { 1990 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1991 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1992 for (j=ys; j<ys+ny; j++) { 1993 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1994 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1995 for (k=zs; k<zs+nz; k++) { 1996 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1997 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1998 1999 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2000 2001 cnt = 0; 2002 for (ii=istart; ii<iend+1; ii++) { 2003 for (jj=jstart; jj<jend+1; jj++) { 2004 for (kk=kstart; kk<kend+1; kk++) { 2005 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2006 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2007 } 2008 } 2009 } 2010 } 2011 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2012 } 2013 } 2014 } 2015 ierr = PetscFree(values);CHKERRQ(ierr); 2016 /* do not copy values to GPU since they are all zero and not yet needed there */ 2017 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2018 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2019 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2020 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2021 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(cols);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 /* 2028 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2029 identify in the local ordering with periodic domain. 2030 */ 2031 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2032 { 2033 PetscErrorCode ierr; 2034 PetscInt i,n; 2035 2036 PetscFunctionBegin; 2037 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2038 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2039 for (i=0,n=0; i<*cnt; i++) { 2040 if (col[i] >= *row) col[n++] = col[i]; 2041 } 2042 *cnt = n; 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2047 { 2048 PetscErrorCode ierr; 2049 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2050 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2051 PetscInt istart,iend,jstart,jend,ii,jj; 2052 MPI_Comm comm; 2053 PetscScalar *values; 2054 DMBoundaryType bx,by; 2055 DMDAStencilType st; 2056 ISLocalToGlobalMapping ltog; 2057 2058 PetscFunctionBegin; 2059 /* 2060 nc - number of components per grid point 2061 col - number of colors needed in one direction for single component problem 2062 */ 2063 ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr); 2064 col = 2*s + 1; 2065 2066 ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr); 2067 ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr); 2068 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2069 2070 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2071 2072 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2073 2074 /* determine the matrix preallocation information */ 2075 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2076 for (i=xs; i<xs+nx; i++) { 2077 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2078 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2079 for (j=ys; j<ys+ny; j++) { 2080 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2081 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2082 slot = i - gxs + gnx*(j - gys); 2083 2084 /* Find block columns in block row */ 2085 cnt = 0; 2086 for (ii=istart; ii<iend+1; ii++) { 2087 for (jj=jstart; jj<jend+1; jj++) { 2088 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2089 cols[cnt++] = slot + ii + gnx*jj; 2090 } 2091 } 2092 } 2093 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2094 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2095 } 2096 } 2097 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2098 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2099 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2100 2101 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2102 2103 /* 2104 For each node in the grid: we get the neighbors in the local (on processor ordering 2105 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2106 PETSc ordering. 2107 */ 2108 if (!da->prealloc_only) { 2109 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2110 for (i=xs; i<xs+nx; i++) { 2111 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2112 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2113 for (j=ys; j<ys+ny; j++) { 2114 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2115 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2116 slot = i - gxs + gnx*(j - gys); 2117 2118 /* Find block columns in block row */ 2119 cnt = 0; 2120 for (ii=istart; ii<iend+1; ii++) { 2121 for (jj=jstart; jj<jend+1; jj++) { 2122 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2123 cols[cnt++] = slot + ii + gnx*jj; 2124 } 2125 } 2126 } 2127 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2128 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2129 } 2130 } 2131 ierr = PetscFree(values);CHKERRQ(ierr); 2132 /* do not copy values to GPU since they are all zero and not yet needed there */ 2133 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2134 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2137 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2138 } 2139 ierr = PetscFree(cols);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2144 { 2145 PetscErrorCode ierr; 2146 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2147 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2148 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2149 MPI_Comm comm; 2150 PetscScalar *values; 2151 DMBoundaryType bx,by,bz; 2152 DMDAStencilType st; 2153 ISLocalToGlobalMapping ltog; 2154 2155 PetscFunctionBegin; 2156 /* 2157 nc - number of components per grid point 2158 col - number of colors needed in one direction for single component problem 2159 */ 2160 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2161 col = 2*s + 1; 2162 2163 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2164 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2165 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2166 2167 /* create the matrix */ 2168 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2169 2170 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2171 2172 /* determine the matrix preallocation information */ 2173 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2174 for (i=xs; i<xs+nx; i++) { 2175 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2176 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2177 for (j=ys; j<ys+ny; j++) { 2178 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2179 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2180 for (k=zs; k<zs+nz; k++) { 2181 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2182 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2183 2184 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2185 2186 /* Find block columns in block row */ 2187 cnt = 0; 2188 for (ii=istart; ii<iend+1; ii++) { 2189 for (jj=jstart; jj<jend+1; jj++) { 2190 for (kk=kstart; kk<kend+1; kk++) { 2191 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2192 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2193 } 2194 } 2195 } 2196 } 2197 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2198 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2199 } 2200 } 2201 } 2202 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2203 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2204 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2205 2206 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2207 2208 /* 2209 For each node in the grid: we get the neighbors in the local (on processor ordering 2210 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2211 PETSc ordering. 2212 */ 2213 if (!da->prealloc_only) { 2214 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2215 for (i=xs; i<xs+nx; i++) { 2216 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2217 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2218 for (j=ys; j<ys+ny; j++) { 2219 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2220 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2221 for (k=zs; k<zs+nz; k++) { 2222 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2223 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2224 2225 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2226 2227 cnt = 0; 2228 for (ii=istart; ii<iend+1; ii++) { 2229 for (jj=jstart; jj<jend+1; jj++) { 2230 for (kk=kstart; kk<kend+1; kk++) { 2231 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2232 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2233 } 2234 } 2235 } 2236 } 2237 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2238 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2239 } 2240 } 2241 } 2242 ierr = PetscFree(values);CHKERRQ(ierr); 2243 /* do not copy values to GPU since they are all zero and not yet needed there */ 2244 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2245 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2248 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2249 } 2250 ierr = PetscFree(cols);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 /* ---------------------------------------------------------------------------------*/ 2255 2256 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2257 { 2258 PetscErrorCode ierr; 2259 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2260 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2261 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2262 DM_DA *dd = (DM_DA*)da->data; 2263 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2264 MPI_Comm comm; 2265 PetscScalar *values; 2266 DMBoundaryType bx,by,bz; 2267 ISLocalToGlobalMapping ltog; 2268 DMDAStencilType st; 2269 PetscBool removedups = PETSC_FALSE; 2270 2271 PetscFunctionBegin; 2272 /* 2273 nc - number of components per grid point 2274 col - number of colors needed in one direction for single component problem 2275 2276 */ 2277 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2278 col = 2*s + 1; 2279 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2280 by 2*stencil_width + 1\n"); 2281 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2282 by 2*stencil_width + 1\n"); 2283 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2284 by 2*stencil_width + 1\n"); 2285 2286 /* 2287 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2288 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2289 */ 2290 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2291 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2292 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2293 2294 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2295 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2296 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2297 2298 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2299 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2300 2301 /* determine the matrix preallocation information */ 2302 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2303 2304 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2305 for (i=xs; i<xs+nx; i++) { 2306 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2307 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2308 for (j=ys; j<ys+ny; j++) { 2309 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2310 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2311 for (k=zs; k<zs+nz; k++) { 2312 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2313 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2314 2315 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2316 2317 for (l=0; l<nc; l++) { 2318 cnt = 0; 2319 for (ii=istart; ii<iend+1; ii++) { 2320 for (jj=jstart; jj<jend+1; jj++) { 2321 for (kk=kstart; kk<kend+1; kk++) { 2322 if (ii || jj || kk) { 2323 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2324 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2325 } 2326 } else { 2327 if (dfill) { 2328 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2329 } else { 2330 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2331 } 2332 } 2333 } 2334 } 2335 } 2336 row = l + nc*(slot); 2337 maxcnt = PetscMax(maxcnt,cnt); 2338 if (removedups) { 2339 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2340 } else { 2341 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2342 } 2343 } 2344 } 2345 } 2346 } 2347 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2348 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2349 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2350 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2351 2352 /* 2353 For each node in the grid: we get the neighbors in the local (on processor ordering 2354 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2355 PETSc ordering. 2356 */ 2357 if (!da->prealloc_only) { 2358 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2359 for (i=xs; i<xs+nx; i++) { 2360 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2361 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2362 for (j=ys; j<ys+ny; j++) { 2363 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2364 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2365 for (k=zs; k<zs+nz; k++) { 2366 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2367 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2368 2369 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2370 2371 for (l=0; l<nc; l++) { 2372 cnt = 0; 2373 for (ii=istart; ii<iend+1; ii++) { 2374 for (jj=jstart; jj<jend+1; jj++) { 2375 for (kk=kstart; kk<kend+1; kk++) { 2376 if (ii || jj || kk) { 2377 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2378 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2379 } 2380 } else { 2381 if (dfill) { 2382 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2383 } else { 2384 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2385 } 2386 } 2387 } 2388 } 2389 } 2390 row = l + nc*(slot); 2391 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2392 } 2393 } 2394 } 2395 } 2396 ierr = PetscFree(values);CHKERRQ(ierr); 2397 /* do not copy values to GPU since they are all zero and not yet needed there */ 2398 ierr = MatPinToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2399 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2400 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2401 ierr = MatPinToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2402 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2403 } 2404 ierr = PetscFree(cols);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407