1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscErrorCode ierr; 54 PetscInt nz; 55 56 PetscFunctionBegin; 57 if (!dfillsparse) PetscFunctionReturn(0); 58 59 /* Determine number of non-zeros */ 60 nz = (dfillsparse[w] - w - 1); 61 62 /* Allocate space for our copy of the given sparse matrix representation. */ 63 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 64 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 69 { 70 PetscErrorCode ierr; 71 PetscInt i,k,cnt = 1; 72 73 PetscFunctionBegin; 74 75 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 76 columns to the left with any nonzeros in them plus 1 */ 77 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 78 for (i=0; i<dd->w; i++) { 79 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 80 } 81 for (i=0; i<dd->w; i++) { 82 if (dd->ofillcols[i]) { 83 dd->ofillcols[i] = cnt++; 84 } 85 } 86 PetscFunctionReturn(0); 87 } 88 89 /*@ 90 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 91 of the matrix returned by DMCreateMatrix(). 92 93 Logically Collective on da 94 95 Input Parameters: 96 + da - the distributed array 97 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 98 - ofill - the fill pattern in the off-diagonal blocks 99 100 Level: developer 101 102 Notes: 103 This only makes sense when you are doing multicomponent problems but using the 104 MPIAIJ matrix format 105 106 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 107 representing coupling and 0 entries for missing coupling. For example 108 $ dfill[9] = {1, 0, 0, 109 $ 1, 1, 0, 110 $ 0, 1, 1} 111 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 112 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 113 diagonal block). 114 115 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 116 can be represented in the dfill, ofill format 117 118 Contributed by Glenn Hammond 119 120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 121 122 @*/ 123 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 124 { 125 DM_DA *dd = (DM_DA*)da->data; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 /* save the given dfill and ofill information */ 130 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 131 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 132 133 /* count nonzeros in ofill columns */ 134 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 /*@ 140 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 141 of the matrix returned by DMCreateMatrix(), using sparse representations 142 of fill patterns. 143 144 Logically Collective on da 145 146 Input Parameters: 147 + da - the distributed array 148 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 149 - ofill - the sparse fill pattern in the off-diagonal blocks 150 151 Level: developer 152 153 Notes: This only makes sense when you are doing multicomponent problems but using the 154 MPIAIJ matrix format 155 156 The format for dfill and ofill is a sparse representation of a 157 dof-by-dof matrix with 1 entries representing coupling and 0 entries 158 for missing coupling. The sparse representation is a 1 dimensional 159 array of length nz + dof + 1, where nz is the number of non-zeros in 160 the matrix. The first dof entries in the array give the 161 starting array indices of each row's items in the rest of the array, 162 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 163 and the remaining nz items give the column indices of each of 164 the 1s within the logical 2D matrix. Each row's items within 165 the array are the column indices of the 1s within that row 166 of the 2D matrix. PETSc developers may recognize that this is the 167 same format as that computed by the DMDASetBlockFills_Private() 168 function from a dense 2D matrix representation. 169 170 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 171 can be represented in the dfill, ofill format 172 173 Contributed by Philip C. Roth 174 175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 176 177 @*/ 178 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 179 { 180 DM_DA *dd = (DM_DA*)da->data; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* save the given dfill and ofill information */ 185 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 186 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 187 188 /* count nonzeros in ofill columns */ 189 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 190 191 PetscFunctionReturn(0); 192 } 193 194 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 195 { 196 PetscErrorCode ierr; 197 PetscInt dim,m,n,p,nc; 198 DMBoundaryType bx,by,bz; 199 MPI_Comm comm; 200 PetscMPIInt size; 201 PetscBool isBAIJ; 202 DM_DA *dd = (DM_DA*)da->data; 203 204 PetscFunctionBegin; 205 /* 206 m 207 ------------------------------------------------------ 208 | | 209 | | 210 | ---------------------- | 211 | | | | 212 n | yn | | | 213 | | | | 214 | .--------------------- | 215 | (xs,ys) xn | 216 | . | 217 | (gxs,gys) | 218 | | 219 ----------------------------------------------------- 220 */ 221 222 /* 223 nc - number of components per grid point 224 col - number of colors needed in one direction for single component problem 225 226 */ 227 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 228 229 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 230 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 231 if (ctype == IS_COLORING_LOCAL) { 232 if (size == 1) { 233 ctype = IS_COLORING_GLOBAL; 234 } else if (dim > 1) { 235 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 236 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 237 } 238 } 239 } 240 241 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 242 matrices is for the blocks, not the individual matrix elements */ 243 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 244 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (isBAIJ) { 247 dd->w = 1; 248 dd->xs = dd->xs/nc; 249 dd->xe = dd->xe/nc; 250 dd->Xs = dd->Xs/nc; 251 dd->Xe = dd->Xe/nc; 252 } 253 254 /* 255 We do not provide a getcoloring function in the DMDA operations because 256 the basic DMDA does not know about matrices. We think of DMDA as being 257 more low-level then matrices. 258 */ 259 if (dim == 1) { 260 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 261 } else if (dim == 2) { 262 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 263 } else if (dim == 3) { 264 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 265 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 266 if (isBAIJ) { 267 dd->w = nc; 268 dd->xs = dd->xs*nc; 269 dd->xe = dd->xe*nc; 270 dd->Xs = dd->Xs*nc; 271 dd->Xe = dd->Xe*nc; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 /* ---------------------------------------------------------------------------------*/ 277 278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 279 { 280 PetscErrorCode ierr; 281 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 282 PetscInt ncolors; 283 MPI_Comm comm; 284 DMBoundaryType bx,by; 285 DMDAStencilType st; 286 ISColoringValue *colors; 287 DM_DA *dd = (DM_DA*)da->data; 288 289 PetscFunctionBegin; 290 /* 291 nc - number of components per grid point 292 col - number of colors needed in one direction for single component problem 293 294 */ 295 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 296 col = 2*s + 1; 297 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 298 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 299 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 300 301 /* special case as taught to us by Paul Hovland */ 302 if (st == DMDA_STENCIL_STAR && s == 1) { 303 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 304 } else { 305 if (ctype == IS_COLORING_GLOBAL) { 306 if (!dd->localcoloring) { 307 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 308 ii = 0; 309 for (j=ys; j<ys+ny; j++) { 310 for (i=xs; i<xs+nx; i++) { 311 for (k=0; k<nc; k++) { 312 colors[ii++] = k + nc*((i % col) + col*(j % col)); 313 } 314 } 315 } 316 ncolors = nc + nc*(col-1 + col*(col-1)); 317 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 318 } 319 *coloring = dd->localcoloring; 320 } else if (ctype == IS_COLORING_LOCAL) { 321 if (!dd->ghostedcoloring) { 322 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 323 ii = 0; 324 for (j=gys; j<gys+gny; j++) { 325 for (i=gxs; i<gxs+gnx; i++) { 326 for (k=0; k<nc; k++) { 327 /* the complicated stuff is to handle periodic boundaries */ 328 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 329 } 330 } 331 } 332 ncolors = nc + nc*(col - 1 + col*(col-1)); 333 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 334 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 335 336 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 337 } 338 *coloring = dd->ghostedcoloring; 339 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 340 } 341 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* ---------------------------------------------------------------------------------*/ 346 347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 348 { 349 PetscErrorCode ierr; 350 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 351 PetscInt ncolors; 352 MPI_Comm comm; 353 DMBoundaryType bx,by,bz; 354 DMDAStencilType st; 355 ISColoringValue *colors; 356 DM_DA *dd = (DM_DA*)da->data; 357 358 PetscFunctionBegin; 359 /* 360 nc - number of components per grid point 361 col - number of colors needed in one direction for single component problem 362 363 */ 364 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 365 col = 2*s + 1; 366 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 367 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 368 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 369 370 /* create the coloring */ 371 if (ctype == IS_COLORING_GLOBAL) { 372 if (!dd->localcoloring) { 373 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 374 ii = 0; 375 for (k=zs; k<zs+nz; k++) { 376 for (j=ys; j<ys+ny; j++) { 377 for (i=xs; i<xs+nx; i++) { 378 for (l=0; l<nc; l++) { 379 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 385 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 386 } 387 *coloring = dd->localcoloring; 388 } else if (ctype == IS_COLORING_LOCAL) { 389 if (!dd->ghostedcoloring) { 390 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 391 ii = 0; 392 for (k=gzs; k<gzs+gnz; k++) { 393 for (j=gys; j<gys+gny; j++) { 394 for (i=gxs; i<gxs+gnx; i++) { 395 for (l=0; l<nc; l++) { 396 /* the complicated stuff is to handle periodic boundaries */ 397 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 398 } 399 } 400 } 401 } 402 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 403 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 404 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 405 } 406 *coloring = dd->ghostedcoloring; 407 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 408 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* ---------------------------------------------------------------------------------*/ 413 414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 415 { 416 PetscErrorCode ierr; 417 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 418 PetscInt ncolors; 419 MPI_Comm comm; 420 DMBoundaryType bx; 421 ISColoringValue *colors; 422 DM_DA *dd = (DM_DA*)da->data; 423 424 PetscFunctionBegin; 425 /* 426 nc - number of components per grid point 427 col - number of colors needed in one direction for single component problem 428 429 */ 430 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 431 col = 2*s + 1; 432 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 433 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 434 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 435 436 /* create the coloring */ 437 if (ctype == IS_COLORING_GLOBAL) { 438 if (!dd->localcoloring) { 439 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 440 if (dd->ofillcols) { 441 PetscInt tc = 0; 442 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 443 i1 = 0; 444 for (i=xs; i<xs+nx; i++) { 445 for (l=0; l<nc; l++) { 446 if (dd->ofillcols[l] && (i % col)) { 447 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 448 } else { 449 colors[i1++] = l; 450 } 451 } 452 } 453 ncolors = nc + 2*s*tc; 454 } else { 455 i1 = 0; 456 for (i=xs; i<xs+nx; i++) { 457 for (l=0; l<nc; l++) { 458 colors[i1++] = l + nc*(i % col); 459 } 460 } 461 ncolors = nc + nc*(col-1); 462 } 463 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 464 } 465 *coloring = dd->localcoloring; 466 } else if (ctype == IS_COLORING_LOCAL) { 467 if (!dd->ghostedcoloring) { 468 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 469 i1 = 0; 470 for (i=gxs; i<gxs+gnx; i++) { 471 for (l=0; l<nc; l++) { 472 /* the complicated stuff is to handle periodic boundaries */ 473 colors[i1++] = l + nc*(SetInRange(i,m) % col); 474 } 475 } 476 ncolors = nc + nc*(col-1); 477 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 478 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 479 } 480 *coloring = dd->ghostedcoloring; 481 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 482 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 487 { 488 PetscErrorCode ierr; 489 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 490 PetscInt ncolors; 491 MPI_Comm comm; 492 DMBoundaryType bx,by; 493 ISColoringValue *colors; 494 DM_DA *dd = (DM_DA*)da->data; 495 496 PetscFunctionBegin; 497 /* 498 nc - number of components per grid point 499 col - number of colors needed in one direction for single component problem 500 501 */ 502 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 503 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 505 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 506 /* create the coloring */ 507 if (ctype == IS_COLORING_GLOBAL) { 508 if (!dd->localcoloring) { 509 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 510 ii = 0; 511 for (j=ys; j<ys+ny; j++) { 512 for (i=xs; i<xs+nx; i++) { 513 for (k=0; k<nc; k++) { 514 colors[ii++] = k + nc*((3*j+i) % 5); 515 } 516 } 517 } 518 ncolors = 5*nc; 519 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 520 } 521 *coloring = dd->localcoloring; 522 } else if (ctype == IS_COLORING_LOCAL) { 523 if (!dd->ghostedcoloring) { 524 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 525 ii = 0; 526 for (j=gys; j<gys+gny; j++) { 527 for (i=gxs; i<gxs+gnx; i++) { 528 for (k=0; k<nc; k++) { 529 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 530 } 531 } 532 } 533 ncolors = 5*nc; 534 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 535 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 536 } 537 *coloring = dd->ghostedcoloring; 538 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 539 PetscFunctionReturn(0); 540 } 541 542 /* =========================================================================== */ 543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 557 558 /*@C 559 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 560 561 Logically Collective on mat 562 563 Input Parameters: 564 + mat - the matrix 565 - da - the da 566 567 Level: intermediate 568 569 @*/ 570 PetscErrorCode MatSetupDM(Mat mat,DM da) 571 { 572 PetscErrorCode ierr; 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 576 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 577 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 582 { 583 DM da; 584 PetscErrorCode ierr; 585 const char *prefix; 586 Mat Anatural; 587 AO ao; 588 PetscInt rstart,rend,*petsc,i; 589 IS is; 590 MPI_Comm comm; 591 PetscViewerFormat format; 592 593 PetscFunctionBegin; 594 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 597 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 600 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 601 602 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 603 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 604 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 605 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 606 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 607 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 608 609 /* call viewer on natural ordering */ 610 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 611 ierr = ISDestroy(&is);CHKERRQ(ierr); 612 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 613 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 615 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 616 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 617 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 618 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 623 { 624 DM da; 625 PetscErrorCode ierr; 626 Mat Anatural,Aapp; 627 AO ao; 628 PetscInt rstart,rend,*app,i,m,n,M,N; 629 IS is; 630 MPI_Comm comm; 631 632 PetscFunctionBegin; 633 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 634 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 635 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 636 637 /* Load the matrix in natural ordering */ 638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 639 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 640 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 642 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 643 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 644 645 /* Map natural ordering to application ordering and create IS */ 646 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 647 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 648 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 649 for (i=rstart; i<rend; i++) app[i-rstart] = i; 650 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 651 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 652 653 /* Do permutation and replace header */ 654 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 655 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 656 ierr = ISDestroy(&is);CHKERRQ(ierr); 657 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 662 { 663 PetscErrorCode ierr; 664 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 665 Mat A; 666 MPI_Comm comm; 667 MatType Atype; 668 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 669 MatType mtype; 670 PetscMPIInt size; 671 DM_DA *dd = (DM_DA*)da->data; 672 673 PetscFunctionBegin; 674 ierr = MatInitializePackage();CHKERRQ(ierr); 675 mtype = da->mattype; 676 677 /* 678 m 679 ------------------------------------------------------ 680 | | 681 | | 682 | ---------------------- | 683 | | | | 684 n | ny | | | 685 | | | | 686 | .--------------------- | 687 | (xs,ys) nx | 688 | . | 689 | (gxs,gys) | 690 | | 691 ----------------------------------------------------- 692 */ 693 694 /* 695 nc - number of components per grid point 696 col - number of colors needed in one direction for single component problem 697 698 */ 699 M = dd->M; 700 N = dd->N; 701 P = dd->P; 702 dim = da->dim; 703 dof = dd->w; 704 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 705 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 706 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 707 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 708 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 709 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 710 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 711 if (dof*nx*ny*nz < da->bind_below) { 712 ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr); 713 ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr); 714 } 715 ierr = MatSetDM(A,da);CHKERRQ(ierr); 716 if (da->structure_only) { 717 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 718 } 719 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 720 /* 721 We do not provide a getmatrix function in the DMDA operations because 722 the basic DMDA does not know about matrices. We think of DMDA as being more 723 more low-level than matrices. This is kind of cheating but, cause sometimes 724 we think of DMDA has higher level than matrices. 725 726 We could switch based on Atype (or mtype), but we do not since the 727 specialized setting routines depend only on the particular preallocation 728 details of the matrix, not the type itself. 729 */ 730 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 731 if (!aij) { 732 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 733 } 734 if (!aij) { 735 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 736 if (!baij) { 737 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 738 } 739 if (!baij) { 740 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 741 if (!sbaij) { 742 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 743 } 744 if (!sbaij) { 745 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 746 if (!sell) { 747 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 748 } 749 } 750 if (!sell) { 751 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 752 } 753 } 754 } 755 if (aij) { 756 if (dim == 1) { 757 if (dd->ofill) { 758 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 759 } else { 760 DMBoundaryType bx; 761 PetscMPIInt size; 762 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 763 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 764 if (size == 1 && bx == DM_BOUNDARY_NONE) { 765 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 766 } else { 767 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 768 } 769 } 770 } else if (dim == 2) { 771 if (dd->ofill) { 772 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 773 } else { 774 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 775 } 776 } else if (dim == 3) { 777 if (dd->ofill) { 778 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 779 } else { 780 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 781 } 782 } 783 } else if (baij) { 784 if (dim == 2) { 785 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 786 } else if (dim == 3) { 787 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 788 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 789 } else if (sbaij) { 790 if (dim == 2) { 791 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 792 } else if (dim == 3) { 793 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 794 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 795 } else if (sell) { 796 if (dim == 2) { 797 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 798 } else if (dim == 3) { 799 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 800 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 801 } else if (is) { 802 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 803 } else { 804 ISLocalToGlobalMapping ltog; 805 806 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 807 ierr = MatSetUp(A);CHKERRQ(ierr); 808 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 809 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 810 } 811 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 812 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 813 ierr = MatSetDM(A,da);CHKERRQ(ierr); 814 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 815 if (size > 1) { 816 /* change viewer to display matrix in natural ordering */ 817 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 818 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 819 } 820 *J = A; 821 PetscFunctionReturn(0); 822 } 823 824 /* ---------------------------------------------------------------------------------*/ 825 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 826 827 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 828 { 829 DM_DA *da = (DM_DA*)dm->data; 830 Mat lJ; 831 ISLocalToGlobalMapping ltog; 832 IS is_loc_filt, is_glob; 833 const PetscInt *e_loc,*idx; 834 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 835 PetscBool flg; 836 PetscErrorCode ierr; 837 838 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 839 We need to filter the local indices that are represented through the DMDAGetElements decomposition 840 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 841 PetscFunctionBegin; 842 dof = da->w; 843 dim = dm->dim; 844 845 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 846 847 /* get local elements indices in local DMDA numbering */ 848 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 849 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 850 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 851 852 /* obtain a consistent local ordering for MATIS */ 853 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 854 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 855 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 856 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 857 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 858 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 859 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 860 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 861 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 862 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 863 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 864 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 865 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 866 867 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 868 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 869 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 870 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 871 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 872 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 873 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 874 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 875 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 876 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 877 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 878 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 879 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 880 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 881 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 882 ierr = PetscFree(gidx);CHKERRQ(ierr); 883 884 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 885 flg = dm->prealloc_only; 886 dm->prealloc_only = PETSC_TRUE; 887 switch (dim) { 888 case 1: 889 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 890 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 892 break; 893 case 2: 894 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 895 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 897 break; 898 case 3: 899 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 900 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 901 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 902 break; 903 default: 904 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 905 } 906 dm->prealloc_only = flg; 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 911 { 912 PetscErrorCode ierr; 913 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 914 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 915 MPI_Comm comm; 916 PetscScalar *values; 917 DMBoundaryType bx,by; 918 ISLocalToGlobalMapping ltog; 919 DMDAStencilType st; 920 921 PetscFunctionBegin; 922 /* 923 nc - number of components per grid point 924 col - number of colors needed in one direction for single component problem 925 926 */ 927 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 928 col = 2*s + 1; 929 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 930 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 931 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 932 933 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 934 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 935 936 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 937 /* determine the matrix preallocation information */ 938 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 939 for (i=xs; i<xs+nx; i++) { 940 941 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 942 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 943 944 for (j=ys; j<ys+ny; j++) { 945 slot = i - gxs + gnx*(j - gys); 946 947 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 948 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 949 950 cnt = 0; 951 for (k=0; k<nc; k++) { 952 for (l=lstart; l<lend+1; l++) { 953 for (p=pstart; p<pend+1; p++) { 954 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 955 cols[cnt++] = k + nc*(slot + gnx*l + p); 956 } 957 } 958 } 959 rows[k] = k + nc*(slot); 960 } 961 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 962 } 963 } 964 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 965 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 966 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 967 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 968 969 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 970 971 /* 972 For each node in the grid: we get the neighbors in the local (on processor ordering 973 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 974 PETSc ordering. 975 */ 976 if (!da->prealloc_only) { 977 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 978 for (i=xs; i<xs+nx; i++) { 979 980 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 981 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 982 983 for (j=ys; j<ys+ny; j++) { 984 slot = i - gxs + gnx*(j - gys); 985 986 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 987 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 988 989 cnt = 0; 990 for (k=0; k<nc; k++) { 991 for (l=lstart; l<lend+1; l++) { 992 for (p=pstart; p<pend+1; p++) { 993 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 994 cols[cnt++] = k + nc*(slot + gnx*l + p); 995 } 996 } 997 } 998 rows[k] = k + nc*(slot); 999 } 1000 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1001 } 1002 } 1003 ierr = PetscFree(values);CHKERRQ(ierr); 1004 /* do not copy values to GPU since they are all zero and not yet needed there */ 1005 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1006 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1007 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1009 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1010 } 1011 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1016 { 1017 PetscErrorCode ierr; 1018 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1019 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1020 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1021 MPI_Comm comm; 1022 PetscScalar *values; 1023 DMBoundaryType bx,by,bz; 1024 ISLocalToGlobalMapping ltog; 1025 DMDAStencilType st; 1026 1027 PetscFunctionBegin; 1028 /* 1029 nc - number of components per grid point 1030 col - number of colors needed in one direction for single component problem 1031 1032 */ 1033 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1034 col = 2*s + 1; 1035 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1036 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1037 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1038 1039 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1040 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1041 1042 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1043 /* determine the matrix preallocation information */ 1044 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1045 for (i=xs; i<xs+nx; i++) { 1046 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1047 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1048 for (j=ys; j<ys+ny; j++) { 1049 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1050 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1051 for (k=zs; k<zs+nz; k++) { 1052 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1053 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1054 1055 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1056 1057 cnt = 0; 1058 for (l=0; l<nc; l++) { 1059 for (ii=istart; ii<iend+1; ii++) { 1060 for (jj=jstart; jj<jend+1; jj++) { 1061 for (kk=kstart; kk<kend+1; kk++) { 1062 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1063 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1064 } 1065 } 1066 } 1067 } 1068 rows[l] = l + nc*(slot); 1069 } 1070 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1071 } 1072 } 1073 } 1074 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1075 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1076 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1077 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1078 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1079 1080 /* 1081 For each node in the grid: we get the neighbors in the local (on processor ordering 1082 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1083 PETSc ordering. 1084 */ 1085 if (!da->prealloc_only) { 1086 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1087 for (i=xs; i<xs+nx; i++) { 1088 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1089 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1090 for (j=ys; j<ys+ny; j++) { 1091 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1092 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1093 for (k=zs; k<zs+nz; k++) { 1094 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1095 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1096 1097 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1098 1099 cnt = 0; 1100 for (l=0; l<nc; l++) { 1101 for (ii=istart; ii<iend+1; ii++) { 1102 for (jj=jstart; jj<jend+1; jj++) { 1103 for (kk=kstart; kk<kend+1; kk++) { 1104 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1105 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1106 } 1107 } 1108 } 1109 } 1110 rows[l] = l + nc*(slot); 1111 } 1112 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1113 } 1114 } 1115 } 1116 ierr = PetscFree(values);CHKERRQ(ierr); 1117 /* do not copy values to GPU since they are all zero and not yet needed there */ 1118 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1119 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1120 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1121 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1122 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1123 } 1124 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1129 { 1130 PetscErrorCode ierr; 1131 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1132 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1133 MPI_Comm comm; 1134 DMBoundaryType bx,by; 1135 ISLocalToGlobalMapping ltog,mltog; 1136 DMDAStencilType st; 1137 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1138 1139 PetscFunctionBegin; 1140 /* 1141 nc - number of components per grid point 1142 col - number of colors needed in one direction for single component problem 1143 1144 */ 1145 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1146 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1147 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1148 } 1149 col = 2*s + 1; 1150 /* 1151 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1152 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1153 */ 1154 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1155 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1156 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1157 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1159 1160 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1161 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1162 1163 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1164 /* determine the matrix preallocation information */ 1165 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1166 for (i=xs; i<xs+nx; i++) { 1167 1168 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1169 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1170 1171 for (j=ys; j<ys+ny; j++) { 1172 slot = i - gxs + gnx*(j - gys); 1173 1174 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1175 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1176 1177 cnt = 0; 1178 for (k=0; k<nc; k++) { 1179 for (l=lstart; l<lend+1; l++) { 1180 for (p=pstart; p<pend+1; p++) { 1181 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1182 cols[cnt++] = k + nc*(slot + gnx*l + p); 1183 } 1184 } 1185 } 1186 rows[k] = k + nc*(slot); 1187 } 1188 if (removedups) { 1189 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1190 } else { 1191 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1192 } 1193 } 1194 } 1195 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1196 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1197 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1198 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1199 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1200 if (!mltog) { 1201 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1202 } 1203 1204 /* 1205 For each node in the grid: we get the neighbors in the local (on processor ordering 1206 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1207 PETSc ordering. 1208 */ 1209 if (!da->prealloc_only) { 1210 for (i=xs; i<xs+nx; i++) { 1211 1212 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1213 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1214 1215 for (j=ys; j<ys+ny; j++) { 1216 slot = i - gxs + gnx*(j - gys); 1217 1218 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1219 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1220 1221 cnt = 0; 1222 for (l=lstart; l<lend+1; l++) { 1223 for (p=pstart; p<pend+1; p++) { 1224 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1225 cols[cnt++] = nc*(slot + gnx*l + p); 1226 for (k=1; k<nc; k++) { 1227 cols[cnt] = 1 + cols[cnt-1];cnt++; 1228 } 1229 } 1230 } 1231 } 1232 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1233 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1234 } 1235 } 1236 /* do not copy values to GPU since they are all zero and not yet needed there */ 1237 ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr); 1238 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1239 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1240 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1241 if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);} 1242 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1243 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1244 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1245 } 1246 } 1247 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1252 { 1253 PetscErrorCode ierr; 1254 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1255 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1256 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1257 DM_DA *dd = (DM_DA*)da->data; 1258 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1259 MPI_Comm comm; 1260 DMBoundaryType bx,by; 1261 ISLocalToGlobalMapping ltog; 1262 DMDAStencilType st; 1263 PetscBool removedups = PETSC_FALSE; 1264 1265 PetscFunctionBegin; 1266 /* 1267 nc - number of components per grid point 1268 col - number of colors needed in one direction for single component problem 1269 1270 */ 1271 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1272 col = 2*s + 1; 1273 /* 1274 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1275 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1276 */ 1277 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1278 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1279 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1280 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1282 1283 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1284 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1285 1286 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1287 /* determine the matrix preallocation information */ 1288 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1289 for (i=xs; i<xs+nx; i++) { 1290 1291 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1292 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1293 1294 for (j=ys; j<ys+ny; j++) { 1295 slot = i - gxs + gnx*(j - gys); 1296 1297 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1298 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1299 1300 for (k=0; k<nc; k++) { 1301 cnt = 0; 1302 for (l=lstart; l<lend+1; l++) { 1303 for (p=pstart; p<pend+1; p++) { 1304 if (l || p) { 1305 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1306 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1307 } 1308 } else { 1309 if (dfill) { 1310 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1311 } else { 1312 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1313 } 1314 } 1315 } 1316 } 1317 row = k + nc*(slot); 1318 maxcnt = PetscMax(maxcnt,cnt); 1319 if (removedups) { 1320 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1321 } else { 1322 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1323 } 1324 } 1325 } 1326 } 1327 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1328 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1329 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1330 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1331 1332 /* 1333 For each node in the grid: we get the neighbors in the local (on processor ordering 1334 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1335 PETSc ordering. 1336 */ 1337 if (!da->prealloc_only) { 1338 for (i=xs; i<xs+nx; i++) { 1339 1340 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1341 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1342 1343 for (j=ys; j<ys+ny; j++) { 1344 slot = i - gxs + gnx*(j - gys); 1345 1346 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1347 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1348 1349 for (k=0; k<nc; k++) { 1350 cnt = 0; 1351 for (l=lstart; l<lend+1; l++) { 1352 for (p=pstart; p<pend+1; p++) { 1353 if (l || p) { 1354 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1355 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1356 } 1357 } else { 1358 if (dfill) { 1359 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1360 } else { 1361 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1362 } 1363 } 1364 } 1365 } 1366 row = k + nc*(slot); 1367 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1368 } 1369 } 1370 } 1371 /* do not copy values to GPU since they are all zero and not yet needed there */ 1372 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1373 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1374 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1376 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(cols);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 /* ---------------------------------------------------------------------------------*/ 1383 1384 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1385 { 1386 PetscErrorCode ierr; 1387 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1388 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1389 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1390 MPI_Comm comm; 1391 DMBoundaryType bx,by,bz; 1392 ISLocalToGlobalMapping ltog,mltog; 1393 DMDAStencilType st; 1394 PetscBool removedups = PETSC_FALSE; 1395 1396 PetscFunctionBegin; 1397 /* 1398 nc - number of components per grid point 1399 col - number of colors needed in one direction for single component problem 1400 1401 */ 1402 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1403 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1404 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1405 } 1406 col = 2*s + 1; 1407 1408 /* 1409 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1410 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1411 */ 1412 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1413 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1414 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1415 1416 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1417 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1418 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1419 1420 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1421 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1422 1423 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1424 /* determine the matrix preallocation information */ 1425 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1426 for (i=xs; i<xs+nx; i++) { 1427 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1428 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1429 for (j=ys; j<ys+ny; j++) { 1430 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1431 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1432 for (k=zs; k<zs+nz; k++) { 1433 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1434 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1435 1436 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1437 1438 cnt = 0; 1439 for (l=0; l<nc; l++) { 1440 for (ii=istart; ii<iend+1; ii++) { 1441 for (jj=jstart; jj<jend+1; jj++) { 1442 for (kk=kstart; kk<kend+1; kk++) { 1443 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1444 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1445 } 1446 } 1447 } 1448 } 1449 rows[l] = l + nc*(slot); 1450 } 1451 if (removedups) { 1452 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1455 } 1456 } 1457 } 1458 } 1459 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1460 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1461 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1462 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1463 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1464 if (!mltog) { 1465 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1466 } 1467 1468 /* 1469 For each node in the grid: we get the neighbors in the local (on processor ordering 1470 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1471 PETSc ordering. 1472 */ 1473 if (!da->prealloc_only) { 1474 for (i=xs; i<xs+nx; i++) { 1475 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1476 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1477 for (j=ys; j<ys+ny; j++) { 1478 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1479 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1480 for (k=zs; k<zs+nz; k++) { 1481 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1482 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1483 1484 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1485 1486 cnt = 0; 1487 for (kk=kstart; kk<kend+1; kk++) { 1488 for (jj=jstart; jj<jend+1; jj++) { 1489 for (ii=istart; ii<iend+1; ii++) { 1490 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1491 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1492 for (l=1; l<nc; l++) { 1493 cols[cnt] = 1 + cols[cnt-1];cnt++; 1494 } 1495 } 1496 } 1497 } 1498 } 1499 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1500 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1501 } 1502 } 1503 } 1504 /* do not copy values to GPU since they are all zero and not yet needed there */ 1505 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1506 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1509 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1510 } 1511 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1512 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1513 } 1514 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 /* ---------------------------------------------------------------------------------*/ 1519 1520 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1521 { 1522 PetscErrorCode ierr; 1523 DM_DA *dd = (DM_DA*)da->data; 1524 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1525 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1526 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1527 DMBoundaryType bx; 1528 ISLocalToGlobalMapping ltog; 1529 PetscMPIInt rank,size; 1530 1531 PetscFunctionBegin; 1532 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1533 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1534 1535 /* 1536 nc - number of components per grid point 1537 1538 */ 1539 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1540 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1541 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1542 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1543 1544 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1545 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1546 1547 /* 1548 note should be smaller for first and last process with no periodic 1549 does not handle dfill 1550 */ 1551 cnt = 0; 1552 /* coupling with process to the left */ 1553 for (i=0; i<s; i++) { 1554 for (j=0; j<nc; j++) { 1555 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1556 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1557 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1558 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1559 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1560 } 1561 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1562 cnt++; 1563 } 1564 } 1565 for (i=s; i<nx-s; i++) { 1566 for (j=0; j<nc; j++) { 1567 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1568 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1569 cnt++; 1570 } 1571 } 1572 /* coupling with process to the right */ 1573 for (i=nx-s; i<nx; i++) { 1574 for (j=0; j<nc; j++) { 1575 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1576 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1577 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1578 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1579 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1580 } 1581 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1582 cnt++; 1583 } 1584 } 1585 1586 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1587 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1588 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1589 1590 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1591 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1592 1593 /* 1594 For each node in the grid: we get the neighbors in the local (on processor ordering 1595 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1596 PETSc ordering. 1597 */ 1598 if (!da->prealloc_only) { 1599 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1600 row = xs*nc; 1601 /* coupling with process to the left */ 1602 for (i=xs; i<xs+s; i++) { 1603 for (j=0; j<nc; j++) { 1604 cnt = 0; 1605 if (rank) { 1606 for (l=0; l<s; l++) { 1607 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1608 } 1609 } 1610 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1611 for (l=0; l<s; l++) { 1612 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1613 } 1614 } 1615 if (dfill) { 1616 for (k=dfill[j]; k<dfill[j+1]; k++) { 1617 cols[cnt++] = i*nc + dfill[k]; 1618 } 1619 } else { 1620 for (k=0; k<nc; k++) { 1621 cols[cnt++] = i*nc + k; 1622 } 1623 } 1624 for (l=0; l<s; l++) { 1625 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1626 } 1627 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; 1629 } 1630 } 1631 for (i=xs+s; i<xs+nx-s; i++) { 1632 for (j=0; j<nc; j++) { 1633 cnt = 0; 1634 for (l=0; l<s; l++) { 1635 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1636 } 1637 if (dfill) { 1638 for (k=dfill[j]; k<dfill[j+1]; k++) { 1639 cols[cnt++] = i*nc + dfill[k]; 1640 } 1641 } else { 1642 for (k=0; k<nc; k++) { 1643 cols[cnt++] = i*nc + k; 1644 } 1645 } 1646 for (l=0; l<s; l++) { 1647 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1648 } 1649 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1650 row++; 1651 } 1652 } 1653 /* coupling with process to the right */ 1654 for (i=xs+nx-s; i<xs+nx; i++) { 1655 for (j=0; j<nc; j++) { 1656 cnt = 0; 1657 for (l=0; l<s; l++) { 1658 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1659 } 1660 if (dfill) { 1661 for (k=dfill[j]; k<dfill[j+1]; k++) { 1662 cols[cnt++] = i*nc + dfill[k]; 1663 } 1664 } else { 1665 for (k=0; k<nc; k++) { 1666 cols[cnt++] = i*nc + k; 1667 } 1668 } 1669 if (rank < size-1) { 1670 for (l=0; l<s; l++) { 1671 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1672 } 1673 } 1674 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1675 for (l=0; l<s; l++) { 1676 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1677 } 1678 } 1679 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; 1681 } 1682 } 1683 ierr = PetscFree(cols);CHKERRQ(ierr); 1684 /* do not copy values to GPU since they are all zero and not yet needed there */ 1685 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1689 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 /* ---------------------------------------------------------------------------------*/ 1695 1696 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1697 { 1698 PetscErrorCode ierr; 1699 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1700 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1701 PetscInt istart,iend; 1702 DMBoundaryType bx; 1703 ISLocalToGlobalMapping ltog,mltog; 1704 1705 PetscFunctionBegin; 1706 /* 1707 nc - number of components per grid point 1708 col - number of colors needed in one direction for single component problem 1709 1710 */ 1711 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1712 if (!isIS && bx == DM_BOUNDARY_NONE) { 1713 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1714 } 1715 col = 2*s + 1; 1716 1717 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1718 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1719 1720 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1721 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1722 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1723 1724 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1725 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1726 if (!mltog) { 1727 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1728 } 1729 1730 /* 1731 For each node in the grid: we get the neighbors in the local (on processor ordering 1732 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1733 PETSc ordering. 1734 */ 1735 if (!da->prealloc_only) { 1736 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1737 for (i=xs; i<xs+nx; i++) { 1738 istart = PetscMax(-s,gxs - i); 1739 iend = PetscMin(s,gxs + gnx - i - 1); 1740 slot = i - gxs; 1741 1742 cnt = 0; 1743 for (i1=istart; i1<iend+1; i1++) { 1744 cols[cnt++] = nc*(slot + i1); 1745 for (l=1; l<nc; l++) { 1746 cols[cnt] = 1 + cols[cnt-1];cnt++; 1747 } 1748 } 1749 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1750 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1751 } 1752 /* do not copy values to GPU since they are all zero and not yet needed there */ 1753 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1754 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1755 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 if (!isIS && bx == DM_BOUNDARY_NONE) { 1757 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1758 } 1759 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1760 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1761 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 /* ---------------------------------------------------------------------------------*/ 1767 1768 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1769 { 1770 PetscErrorCode ierr; 1771 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1772 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1773 PetscInt istart,iend; 1774 DMBoundaryType bx; 1775 ISLocalToGlobalMapping ltog,mltog; 1776 1777 PetscFunctionBegin; 1778 /* 1779 nc - number of components per grid point 1780 col - number of colors needed in one direction for single component problem 1781 */ 1782 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1783 col = 2*s + 1; 1784 1785 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1786 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1787 1788 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1789 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1790 1791 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1792 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1793 if (!mltog) { 1794 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1795 } 1796 1797 /* 1798 For each node in the grid: we get the neighbors in the local (on processor ordering 1799 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1800 PETSc ordering. 1801 */ 1802 if (!da->prealloc_only) { 1803 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1804 for (i=xs; i<xs+nx; i++) { 1805 istart = PetscMax(-s,gxs - i); 1806 iend = PetscMin(s,gxs + gnx - i - 1); 1807 slot = i - gxs; 1808 1809 cnt = 0; 1810 for (i1=istart; i1<iend+1; i1++) { 1811 cols[cnt++] = nc*(slot + i1); 1812 for (l=1; l<nc; l++) { 1813 cols[cnt] = 1 + cols[cnt-1];cnt++; 1814 } 1815 } 1816 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1817 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1818 } 1819 /* do not copy values to GPU since they are all zero and not yet needed there */ 1820 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1821 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1823 if (!isIS && bx == DM_BOUNDARY_NONE) { 1824 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1825 } 1826 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1827 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1828 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1829 } 1830 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1835 { 1836 PetscErrorCode ierr; 1837 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1838 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1839 PetscInt istart,iend,jstart,jend,ii,jj; 1840 MPI_Comm comm; 1841 PetscScalar *values; 1842 DMBoundaryType bx,by; 1843 DMDAStencilType st; 1844 ISLocalToGlobalMapping ltog; 1845 1846 PetscFunctionBegin; 1847 /* 1848 nc - number of components per grid point 1849 col - number of colors needed in one direction for single component problem 1850 */ 1851 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1852 col = 2*s + 1; 1853 1854 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1855 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1856 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1857 1858 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1859 1860 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1861 1862 /* determine the matrix preallocation information */ 1863 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1864 for (i=xs; i<xs+nx; i++) { 1865 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1866 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1867 for (j=ys; j<ys+ny; j++) { 1868 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1869 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1870 slot = i - gxs + gnx*(j - gys); 1871 1872 /* Find block columns in block row */ 1873 cnt = 0; 1874 for (ii=istart; ii<iend+1; ii++) { 1875 for (jj=jstart; jj<jend+1; jj++) { 1876 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1877 cols[cnt++] = slot + ii + gnx*jj; 1878 } 1879 } 1880 } 1881 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1882 } 1883 } 1884 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1885 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1886 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1887 1888 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1889 1890 /* 1891 For each node in the grid: we get the neighbors in the local (on processor ordering 1892 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1893 PETSc ordering. 1894 */ 1895 if (!da->prealloc_only) { 1896 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1897 for (i=xs; i<xs+nx; i++) { 1898 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1899 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1900 for (j=ys; j<ys+ny; j++) { 1901 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1902 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1903 slot = i - gxs + gnx*(j - gys); 1904 cnt = 0; 1905 for (ii=istart; ii<iend+1; ii++) { 1906 for (jj=jstart; jj<jend+1; jj++) { 1907 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1908 cols[cnt++] = slot + ii + gnx*jj; 1909 } 1910 } 1911 } 1912 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1913 } 1914 } 1915 ierr = PetscFree(values);CHKERRQ(ierr); 1916 /* do not copy values to GPU since they are all zero and not yet needed there */ 1917 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1918 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1919 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1921 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1922 } 1923 ierr = PetscFree(cols);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1928 { 1929 PetscErrorCode ierr; 1930 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1931 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1932 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1933 MPI_Comm comm; 1934 PetscScalar *values; 1935 DMBoundaryType bx,by,bz; 1936 DMDAStencilType st; 1937 ISLocalToGlobalMapping ltog; 1938 1939 PetscFunctionBegin; 1940 /* 1941 nc - number of components per grid point 1942 col - number of colors needed in one direction for single component problem 1943 1944 */ 1945 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1946 col = 2*s + 1; 1947 1948 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1949 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1950 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1951 1952 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1953 1954 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1955 1956 /* determine the matrix preallocation information */ 1957 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1958 for (i=xs; i<xs+nx; i++) { 1959 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1960 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1961 for (j=ys; j<ys+ny; j++) { 1962 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1963 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1964 for (k=zs; k<zs+nz; k++) { 1965 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1966 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1967 1968 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1969 1970 /* Find block columns in block row */ 1971 cnt = 0; 1972 for (ii=istart; ii<iend+1; ii++) { 1973 for (jj=jstart; jj<jend+1; jj++) { 1974 for (kk=kstart; kk<kend+1; kk++) { 1975 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1976 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1977 } 1978 } 1979 } 1980 } 1981 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1982 } 1983 } 1984 } 1985 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1986 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1987 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1988 1989 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1990 1991 /* 1992 For each node in the grid: we get the neighbors in the local (on processor ordering 1993 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1994 PETSc ordering. 1995 */ 1996 if (!da->prealloc_only) { 1997 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1998 for (i=xs; i<xs+nx; i++) { 1999 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2000 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2001 for (j=ys; j<ys+ny; j++) { 2002 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2003 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2004 for (k=zs; k<zs+nz; k++) { 2005 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2006 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2007 2008 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2009 2010 cnt = 0; 2011 for (ii=istart; ii<iend+1; ii++) { 2012 for (jj=jstart; jj<jend+1; jj++) { 2013 for (kk=kstart; kk<kend+1; kk++) { 2014 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2015 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2016 } 2017 } 2018 } 2019 } 2020 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2021 } 2022 } 2023 } 2024 ierr = PetscFree(values);CHKERRQ(ierr); 2025 /* do not copy values to GPU since they are all zero and not yet needed there */ 2026 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2027 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2030 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2031 } 2032 ierr = PetscFree(cols);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /* 2037 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2038 identify in the local ordering with periodic domain. 2039 */ 2040 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2041 { 2042 PetscErrorCode ierr; 2043 PetscInt i,n; 2044 2045 PetscFunctionBegin; 2046 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2047 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2048 for (i=0,n=0; i<*cnt; i++) { 2049 if (col[i] >= *row) col[n++] = col[i]; 2050 } 2051 *cnt = n; 2052 PetscFunctionReturn(0); 2053 } 2054 2055 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2056 { 2057 PetscErrorCode ierr; 2058 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2059 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2060 PetscInt istart,iend,jstart,jend,ii,jj; 2061 MPI_Comm comm; 2062 PetscScalar *values; 2063 DMBoundaryType bx,by; 2064 DMDAStencilType st; 2065 ISLocalToGlobalMapping ltog; 2066 2067 PetscFunctionBegin; 2068 /* 2069 nc - number of components per grid point 2070 col - number of colors needed in one direction for single component problem 2071 */ 2072 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2073 col = 2*s + 1; 2074 2075 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2076 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2077 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2078 2079 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2080 2081 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2082 2083 /* determine the matrix preallocation information */ 2084 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2085 for (i=xs; i<xs+nx; i++) { 2086 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2087 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2088 for (j=ys; j<ys+ny; j++) { 2089 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2090 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2091 slot = i - gxs + gnx*(j - gys); 2092 2093 /* Find block columns in block row */ 2094 cnt = 0; 2095 for (ii=istart; ii<iend+1; ii++) { 2096 for (jj=jstart; jj<jend+1; jj++) { 2097 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2098 cols[cnt++] = slot + ii + gnx*jj; 2099 } 2100 } 2101 } 2102 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2103 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2104 } 2105 } 2106 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2107 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2108 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2109 2110 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2111 2112 /* 2113 For each node in the grid: we get the neighbors in the local (on processor ordering 2114 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2115 PETSc ordering. 2116 */ 2117 if (!da->prealloc_only) { 2118 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2119 for (i=xs; i<xs+nx; i++) { 2120 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2121 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2122 for (j=ys; j<ys+ny; j++) { 2123 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2124 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2125 slot = i - gxs + gnx*(j - gys); 2126 2127 /* Find block columns in block row */ 2128 cnt = 0; 2129 for (ii=istart; ii<iend+1; ii++) { 2130 for (jj=jstart; jj<jend+1; jj++) { 2131 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2132 cols[cnt++] = slot + ii + gnx*jj; 2133 } 2134 } 2135 } 2136 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2137 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2138 } 2139 } 2140 ierr = PetscFree(values);CHKERRQ(ierr); 2141 /* do not copy values to GPU since they are all zero and not yet needed there */ 2142 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2143 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2146 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2147 } 2148 ierr = PetscFree(cols);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2153 { 2154 PetscErrorCode ierr; 2155 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2156 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2157 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2158 MPI_Comm comm; 2159 PetscScalar *values; 2160 DMBoundaryType bx,by,bz; 2161 DMDAStencilType st; 2162 ISLocalToGlobalMapping ltog; 2163 2164 PetscFunctionBegin; 2165 /* 2166 nc - number of components per grid point 2167 col - number of colors needed in one direction for single component problem 2168 */ 2169 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2170 col = 2*s + 1; 2171 2172 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2173 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2174 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2175 2176 /* create the matrix */ 2177 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2178 2179 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2180 2181 /* determine the matrix preallocation information */ 2182 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2183 for (i=xs; i<xs+nx; i++) { 2184 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2185 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2186 for (j=ys; j<ys+ny; j++) { 2187 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2188 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2189 for (k=zs; k<zs+nz; k++) { 2190 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2191 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2192 2193 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2194 2195 /* Find block columns in block row */ 2196 cnt = 0; 2197 for (ii=istart; ii<iend+1; ii++) { 2198 for (jj=jstart; jj<jend+1; jj++) { 2199 for (kk=kstart; kk<kend+1; kk++) { 2200 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2201 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2202 } 2203 } 2204 } 2205 } 2206 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2207 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2208 } 2209 } 2210 } 2211 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2212 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2213 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2214 2215 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2216 2217 /* 2218 For each node in the grid: we get the neighbors in the local (on processor ordering 2219 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2220 PETSc ordering. 2221 */ 2222 if (!da->prealloc_only) { 2223 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2224 for (i=xs; i<xs+nx; i++) { 2225 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2226 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2227 for (j=ys; j<ys+ny; j++) { 2228 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2229 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2230 for (k=zs; k<zs+nz; k++) { 2231 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2232 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2233 2234 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2235 2236 cnt = 0; 2237 for (ii=istart; ii<iend+1; ii++) { 2238 for (jj=jstart; jj<jend+1; jj++) { 2239 for (kk=kstart; kk<kend+1; kk++) { 2240 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2241 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2242 } 2243 } 2244 } 2245 } 2246 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2247 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2248 } 2249 } 2250 } 2251 ierr = PetscFree(values);CHKERRQ(ierr); 2252 /* do not copy values to GPU since they are all zero and not yet needed there */ 2253 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2254 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2255 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2256 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2257 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2258 } 2259 ierr = PetscFree(cols);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 /* ---------------------------------------------------------------------------------*/ 2264 2265 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2266 { 2267 PetscErrorCode ierr; 2268 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2269 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2270 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2271 DM_DA *dd = (DM_DA*)da->data; 2272 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2273 MPI_Comm comm; 2274 PetscScalar *values; 2275 DMBoundaryType bx,by,bz; 2276 ISLocalToGlobalMapping ltog; 2277 DMDAStencilType st; 2278 PetscBool removedups = PETSC_FALSE; 2279 2280 PetscFunctionBegin; 2281 /* 2282 nc - number of components per grid point 2283 col - number of colors needed in one direction for single component problem 2284 2285 */ 2286 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2287 col = 2*s + 1; 2288 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2289 by 2*stencil_width + 1\n"); 2290 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2291 by 2*stencil_width + 1\n"); 2292 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2293 by 2*stencil_width + 1\n"); 2294 2295 /* 2296 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2297 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2298 */ 2299 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2300 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2301 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2302 2303 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2304 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2305 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2306 2307 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2308 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2309 2310 /* determine the matrix preallocation information */ 2311 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2312 2313 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2314 for (i=xs; i<xs+nx; i++) { 2315 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2316 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2317 for (j=ys; j<ys+ny; j++) { 2318 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2319 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2320 for (k=zs; k<zs+nz; k++) { 2321 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2322 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2323 2324 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2325 2326 for (l=0; l<nc; l++) { 2327 cnt = 0; 2328 for (ii=istart; ii<iend+1; ii++) { 2329 for (jj=jstart; jj<jend+1; jj++) { 2330 for (kk=kstart; kk<kend+1; kk++) { 2331 if (ii || jj || kk) { 2332 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2333 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2334 } 2335 } else { 2336 if (dfill) { 2337 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2338 } else { 2339 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2340 } 2341 } 2342 } 2343 } 2344 } 2345 row = l + nc*(slot); 2346 maxcnt = PetscMax(maxcnt,cnt); 2347 if (removedups) { 2348 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2349 } else { 2350 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2351 } 2352 } 2353 } 2354 } 2355 } 2356 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2357 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2358 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2359 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2360 2361 /* 2362 For each node in the grid: we get the neighbors in the local (on processor ordering 2363 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2364 PETSc ordering. 2365 */ 2366 if (!da->prealloc_only) { 2367 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2368 for (i=xs; i<xs+nx; i++) { 2369 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2370 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2371 for (j=ys; j<ys+ny; j++) { 2372 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2373 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2374 for (k=zs; k<zs+nz; k++) { 2375 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2376 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2377 2378 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2379 2380 for (l=0; l<nc; l++) { 2381 cnt = 0; 2382 for (ii=istart; ii<iend+1; ii++) { 2383 for (jj=jstart; jj<jend+1; jj++) { 2384 for (kk=kstart; kk<kend+1; kk++) { 2385 if (ii || jj || kk) { 2386 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2387 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2388 } 2389 } else { 2390 if (dfill) { 2391 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2392 } else { 2393 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2394 } 2395 } 2396 } 2397 } 2398 } 2399 row = l + nc*(slot); 2400 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2401 } 2402 } 2403 } 2404 } 2405 ierr = PetscFree(values);CHKERRQ(ierr); 2406 /* do not copy values to GPU since they are all zero and not yet needed there */ 2407 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2408 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2411 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2412 } 2413 ierr = PetscFree(cols);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416