1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscErrorCode ierr; 54 PetscInt nz; 55 56 PetscFunctionBegin; 57 if (!dfillsparse) PetscFunctionReturn(0); 58 59 /* Determine number of non-zeros */ 60 nz = (dfillsparse[w] - w - 1); 61 62 /* Allocate space for our copy of the given sparse matrix representation. */ 63 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 64 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 69 { 70 PetscErrorCode ierr; 71 PetscInt i,k,cnt = 1; 72 73 PetscFunctionBegin; 74 75 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 76 columns to the left with any nonzeros in them plus 1 */ 77 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 78 for (i=0; i<dd->w; i++) { 79 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 80 } 81 for (i=0; i<dd->w; i++) { 82 if (dd->ofillcols[i]) { 83 dd->ofillcols[i] = cnt++; 84 } 85 } 86 PetscFunctionReturn(0); 87 } 88 89 /*@ 90 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 91 of the matrix returned by DMCreateMatrix(). 92 93 Logically Collective on da 94 95 Input Parameters: 96 + da - the distributed array 97 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 98 - ofill - the fill pattern in the off-diagonal blocks 99 100 Level: developer 101 102 Notes: 103 This only makes sense when you are doing multicomponent problems but using the 104 MPIAIJ matrix format 105 106 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 107 representing coupling and 0 entries for missing coupling. For example 108 $ dfill[9] = {1, 0, 0, 109 $ 1, 1, 0, 110 $ 0, 1, 1} 111 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 112 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 113 diagonal block). 114 115 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 116 can be represented in the dfill, ofill format 117 118 Contributed by Glenn Hammond 119 120 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 121 122 @*/ 123 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 124 { 125 DM_DA *dd = (DM_DA*)da->data; 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 /* save the given dfill and ofill information */ 130 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 131 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 132 133 /* count nonzeros in ofill columns */ 134 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 /*@ 140 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 141 of the matrix returned by DMCreateMatrix(), using sparse representations 142 of fill patterns. 143 144 Logically Collective on da 145 146 Input Parameters: 147 + da - the distributed array 148 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 149 - ofill - the sparse fill pattern in the off-diagonal blocks 150 151 Level: developer 152 153 Notes: This only makes sense when you are doing multicomponent problems but using the 154 MPIAIJ matrix format 155 156 The format for dfill and ofill is a sparse representation of a 157 dof-by-dof matrix with 1 entries representing coupling and 0 entries 158 for missing coupling. The sparse representation is a 1 dimensional 159 array of length nz + dof + 1, where nz is the number of non-zeros in 160 the matrix. The first dof entries in the array give the 161 starting array indices of each row's items in the rest of the array, 162 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 163 and the remaining nz items give the column indices of each of 164 the 1s within the logical 2D matrix. Each row's items within 165 the array are the column indices of the 1s within that row 166 of the 2D matrix. PETSc developers may recognize that this is the 167 same format as that computed by the DMDASetBlockFills_Private() 168 function from a dense 2D matrix representation. 169 170 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 171 can be represented in the dfill, ofill format 172 173 Contributed by Philip C. Roth 174 175 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 176 177 @*/ 178 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 179 { 180 DM_DA *dd = (DM_DA*)da->data; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* save the given dfill and ofill information */ 185 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 186 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 187 188 /* count nonzeros in ofill columns */ 189 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 190 191 PetscFunctionReturn(0); 192 } 193 194 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 195 { 196 PetscErrorCode ierr; 197 PetscInt dim,m,n,p,nc; 198 DMBoundaryType bx,by,bz; 199 MPI_Comm comm; 200 PetscMPIInt size; 201 PetscBool isBAIJ; 202 DM_DA *dd = (DM_DA*)da->data; 203 204 PetscFunctionBegin; 205 /* 206 m 207 ------------------------------------------------------ 208 | | 209 | | 210 | ---------------------- | 211 | | | | 212 n | yn | | | 213 | | | | 214 | .--------------------- | 215 | (xs,ys) xn | 216 | . | 217 | (gxs,gys) | 218 | | 219 ----------------------------------------------------- 220 */ 221 222 /* 223 nc - number of components per grid point 224 col - number of colors needed in one direction for single component problem 225 226 */ 227 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 228 229 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 230 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 231 if (ctype == IS_COLORING_LOCAL) { 232 if (size == 1) { 233 ctype = IS_COLORING_GLOBAL; 234 } else if (dim > 1) { 235 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 236 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 237 } 238 } 239 } 240 241 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 242 matrices is for the blocks, not the individual matrix elements */ 243 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 244 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (isBAIJ) { 247 dd->w = 1; 248 dd->xs = dd->xs/nc; 249 dd->xe = dd->xe/nc; 250 dd->Xs = dd->Xs/nc; 251 dd->Xe = dd->Xe/nc; 252 } 253 254 /* 255 We do not provide a getcoloring function in the DMDA operations because 256 the basic DMDA does not know about matrices. We think of DMDA as being 257 more low-level then matrices. 258 */ 259 if (dim == 1) { 260 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 261 } else if (dim == 2) { 262 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 263 } else if (dim == 3) { 264 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 265 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 266 if (isBAIJ) { 267 dd->w = nc; 268 dd->xs = dd->xs*nc; 269 dd->xe = dd->xe*nc; 270 dd->Xs = dd->Xs*nc; 271 dd->Xe = dd->Xe*nc; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 /* ---------------------------------------------------------------------------------*/ 277 278 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 279 { 280 PetscErrorCode ierr; 281 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 282 PetscInt ncolors; 283 MPI_Comm comm; 284 DMBoundaryType bx,by; 285 DMDAStencilType st; 286 ISColoringValue *colors; 287 DM_DA *dd = (DM_DA*)da->data; 288 289 PetscFunctionBegin; 290 /* 291 nc - number of components per grid point 292 col - number of colors needed in one direction for single component problem 293 294 */ 295 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 296 col = 2*s + 1; 297 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 298 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 299 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 300 301 /* special case as taught to us by Paul Hovland */ 302 if (st == DMDA_STENCIL_STAR && s == 1) { 303 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 304 } else { 305 if (ctype == IS_COLORING_GLOBAL) { 306 if (!dd->localcoloring) { 307 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 308 ii = 0; 309 for (j=ys; j<ys+ny; j++) { 310 for (i=xs; i<xs+nx; i++) { 311 for (k=0; k<nc; k++) { 312 colors[ii++] = k + nc*((i % col) + col*(j % col)); 313 } 314 } 315 } 316 ncolors = nc + nc*(col-1 + col*(col-1)); 317 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 318 } 319 *coloring = dd->localcoloring; 320 } else if (ctype == IS_COLORING_LOCAL) { 321 if (!dd->ghostedcoloring) { 322 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 323 ii = 0; 324 for (j=gys; j<gys+gny; j++) { 325 for (i=gxs; i<gxs+gnx; i++) { 326 for (k=0; k<nc; k++) { 327 /* the complicated stuff is to handle periodic boundaries */ 328 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 329 } 330 } 331 } 332 ncolors = nc + nc*(col - 1 + col*(col-1)); 333 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 334 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 335 336 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 337 } 338 *coloring = dd->ghostedcoloring; 339 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 340 } 341 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* ---------------------------------------------------------------------------------*/ 346 347 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 348 { 349 PetscErrorCode ierr; 350 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 351 PetscInt ncolors; 352 MPI_Comm comm; 353 DMBoundaryType bx,by,bz; 354 DMDAStencilType st; 355 ISColoringValue *colors; 356 DM_DA *dd = (DM_DA*)da->data; 357 358 PetscFunctionBegin; 359 /* 360 nc - number of components per grid point 361 col - number of colors needed in one direction for single component problem 362 363 */ 364 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 365 col = 2*s + 1; 366 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 367 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 368 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 369 370 /* create the coloring */ 371 if (ctype == IS_COLORING_GLOBAL) { 372 if (!dd->localcoloring) { 373 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 374 ii = 0; 375 for (k=zs; k<zs+nz; k++) { 376 for (j=ys; j<ys+ny; j++) { 377 for (i=xs; i<xs+nx; i++) { 378 for (l=0; l<nc; l++) { 379 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 385 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 386 } 387 *coloring = dd->localcoloring; 388 } else if (ctype == IS_COLORING_LOCAL) { 389 if (!dd->ghostedcoloring) { 390 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 391 ii = 0; 392 for (k=gzs; k<gzs+gnz; k++) { 393 for (j=gys; j<gys+gny; j++) { 394 for (i=gxs; i<gxs+gnx; i++) { 395 for (l=0; l<nc; l++) { 396 /* the complicated stuff is to handle periodic boundaries */ 397 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 398 } 399 } 400 } 401 } 402 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 403 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 404 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 405 } 406 *coloring = dd->ghostedcoloring; 407 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 408 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* ---------------------------------------------------------------------------------*/ 413 414 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 415 { 416 PetscErrorCode ierr; 417 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 418 PetscInt ncolors; 419 MPI_Comm comm; 420 DMBoundaryType bx; 421 ISColoringValue *colors; 422 DM_DA *dd = (DM_DA*)da->data; 423 424 PetscFunctionBegin; 425 /* 426 nc - number of components per grid point 427 col - number of colors needed in one direction for single component problem 428 429 */ 430 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 431 col = 2*s + 1; 432 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 433 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 434 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 435 436 /* create the coloring */ 437 if (ctype == IS_COLORING_GLOBAL) { 438 if (!dd->localcoloring) { 439 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 440 if (dd->ofillcols) { 441 PetscInt tc = 0; 442 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 443 i1 = 0; 444 for (i=xs; i<xs+nx; i++) { 445 for (l=0; l<nc; l++) { 446 if (dd->ofillcols[l] && (i % col)) { 447 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 448 } else { 449 colors[i1++] = l; 450 } 451 } 452 } 453 ncolors = nc + 2*s*tc; 454 } else { 455 i1 = 0; 456 for (i=xs; i<xs+nx; i++) { 457 for (l=0; l<nc; l++) { 458 colors[i1++] = l + nc*(i % col); 459 } 460 } 461 ncolors = nc + nc*(col-1); 462 } 463 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 464 } 465 *coloring = dd->localcoloring; 466 } else if (ctype == IS_COLORING_LOCAL) { 467 if (!dd->ghostedcoloring) { 468 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 469 i1 = 0; 470 for (i=gxs; i<gxs+gnx; i++) { 471 for (l=0; l<nc; l++) { 472 /* the complicated stuff is to handle periodic boundaries */ 473 colors[i1++] = l + nc*(SetInRange(i,m) % col); 474 } 475 } 476 ncolors = nc + nc*(col-1); 477 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 478 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 479 } 480 *coloring = dd->ghostedcoloring; 481 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 482 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 487 { 488 PetscErrorCode ierr; 489 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 490 PetscInt ncolors; 491 MPI_Comm comm; 492 DMBoundaryType bx,by; 493 ISColoringValue *colors; 494 DM_DA *dd = (DM_DA*)da->data; 495 496 PetscFunctionBegin; 497 /* 498 nc - number of components per grid point 499 col - number of colors needed in one direction for single component problem 500 501 */ 502 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 503 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 505 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 506 /* create the coloring */ 507 if (ctype == IS_COLORING_GLOBAL) { 508 if (!dd->localcoloring) { 509 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 510 ii = 0; 511 for (j=ys; j<ys+ny; j++) { 512 for (i=xs; i<xs+nx; i++) { 513 for (k=0; k<nc; k++) { 514 colors[ii++] = k + nc*((3*j+i) % 5); 515 } 516 } 517 } 518 ncolors = 5*nc; 519 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 520 } 521 *coloring = dd->localcoloring; 522 } else if (ctype == IS_COLORING_LOCAL) { 523 if (!dd->ghostedcoloring) { 524 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 525 ii = 0; 526 for (j=gys; j<gys+gny; j++) { 527 for (i=gxs; i<gxs+gnx; i++) { 528 for (k=0; k<nc; k++) { 529 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 530 } 531 } 532 } 533 ncolors = 5*nc; 534 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 535 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 536 } 537 *coloring = dd->ghostedcoloring; 538 } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 539 PetscFunctionReturn(0); 540 } 541 542 /* =========================================================================== */ 543 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool); 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool); 546 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 557 558 /*@C 559 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 560 561 Logically Collective on mat 562 563 Input Parameters: 564 + mat - the matrix 565 - da - the da 566 567 Level: intermediate 568 569 @*/ 570 PetscErrorCode MatSetupDM(Mat mat,DM da) 571 { 572 PetscErrorCode ierr; 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 576 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 577 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 582 { 583 DM da; 584 PetscErrorCode ierr; 585 const char *prefix; 586 Mat Anatural; 587 AO ao; 588 PetscInt rstart,rend,*petsc,i; 589 IS is; 590 MPI_Comm comm; 591 PetscViewerFormat format; 592 593 PetscFunctionBegin; 594 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 597 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 600 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 601 602 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 603 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 604 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 605 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 606 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 607 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 608 609 /* call viewer on natural ordering */ 610 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 611 ierr = ISDestroy(&is);CHKERRQ(ierr); 612 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 613 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 615 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 616 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 617 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 618 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 623 { 624 DM da; 625 PetscErrorCode ierr; 626 Mat Anatural,Aapp; 627 AO ao; 628 PetscInt rstart,rend,*app,i,m,n,M,N; 629 IS is; 630 MPI_Comm comm; 631 632 PetscFunctionBegin; 633 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 634 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 635 if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 636 637 /* Load the matrix in natural ordering */ 638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 639 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 640 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 642 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 643 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 644 645 /* Map natural ordering to application ordering and create IS */ 646 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 647 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 648 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 649 for (i=rstart; i<rend; i++) app[i-rstart] = i; 650 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 651 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 652 653 /* Do permutation and replace header */ 654 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 655 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 656 ierr = ISDestroy(&is);CHKERRQ(ierr); 657 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 662 { 663 PetscErrorCode ierr; 664 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 665 Mat A; 666 MPI_Comm comm; 667 MatType Atype; 668 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 669 MatType mtype; 670 PetscMPIInt size; 671 DM_DA *dd = (DM_DA*)da->data; 672 673 PetscFunctionBegin; 674 ierr = MatInitializePackage();CHKERRQ(ierr); 675 mtype = da->mattype; 676 677 /* 678 m 679 ------------------------------------------------------ 680 | | 681 | | 682 | ---------------------- | 683 | | | | 684 n | ny | | | 685 | | | | 686 | .--------------------- | 687 | (xs,ys) nx | 688 | . | 689 | (gxs,gys) | 690 | | 691 ----------------------------------------------------- 692 */ 693 694 /* 695 nc - number of components per grid point 696 col - number of colors needed in one direction for single component problem 697 698 */ 699 M = dd->M; 700 N = dd->N; 701 P = dd->P; 702 dim = da->dim; 703 dof = dd->w; 704 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 705 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 706 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 707 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 708 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 709 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 710 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 711 if (dof*nx*ny*nz < da->bind_below) {ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr);} 712 ierr = MatSetDM(A,da);CHKERRQ(ierr); 713 if (da->structure_only) { 714 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 715 } 716 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 717 /* 718 We do not provide a getmatrix function in the DMDA operations because 719 the basic DMDA does not know about matrices. We think of DMDA as being more 720 more low-level than matrices. This is kind of cheating but, cause sometimes 721 we think of DMDA has higher level than matrices. 722 723 We could switch based on Atype (or mtype), but we do not since the 724 specialized setting routines depend only on the particular preallocation 725 details of the matrix, not the type itself. 726 */ 727 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 728 if (!aij) { 729 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 730 } 731 if (!aij) { 732 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 733 if (!baij) { 734 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 735 } 736 if (!baij) { 737 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 738 if (!sbaij) { 739 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 740 } 741 if (!sbaij) { 742 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 743 if (!sell) { 744 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 745 } 746 } 747 if (!sell) { 748 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 749 } 750 } 751 } 752 if (aij) { 753 if (dim == 1) { 754 if (dd->ofill) { 755 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 756 } else { 757 DMBoundaryType bx; 758 PetscMPIInt size; 759 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 761 if (size == 1 && bx == DM_BOUNDARY_NONE) { 762 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);CHKERRQ(ierr); 763 } else { 764 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 765 } 766 } 767 } else if (dim == 2) { 768 if (dd->ofill) { 769 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 770 } else { 771 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 772 } 773 } else if (dim == 3) { 774 if (dd->ofill) { 775 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 776 } else { 777 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);CHKERRQ(ierr); 778 } 779 } 780 } else if (baij) { 781 if (dim == 2) { 782 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 783 } else if (dim == 3) { 784 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 785 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 786 } else if (sbaij) { 787 if (dim == 2) { 788 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 789 } else if (dim == 3) { 790 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 791 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 792 } else if (sell) { 793 if (dim == 2) { 794 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 795 } else if (dim == 3) { 796 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 797 } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 798 } else if (is) { 799 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 800 } else { 801 ISLocalToGlobalMapping ltog; 802 803 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 804 ierr = MatSetUp(A);CHKERRQ(ierr); 805 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 806 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 807 } 808 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 809 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 810 ierr = MatSetDM(A,da);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 812 if (size > 1) { 813 /* change viewer to display matrix in natural ordering */ 814 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 815 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 816 } 817 *J = A; 818 PetscFunctionReturn(0); 819 } 820 821 /* ---------------------------------------------------------------------------------*/ 822 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 823 824 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 825 { 826 DM_DA *da = (DM_DA*)dm->data; 827 Mat lJ; 828 ISLocalToGlobalMapping ltog; 829 IS is_loc_filt, is_glob; 830 const PetscInt *e_loc,*idx; 831 PetscInt nel,nen,nv,dof,dim,*gidx,nb; 832 PetscBool flg; 833 PetscErrorCode ierr; 834 835 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 836 We need to filter the local indices that are represented through the DMDAGetElements decomposition 837 This is because the size of the local matrices in MATIS is the local size of the l2g map */ 838 PetscFunctionBegin; 839 dof = da->w; 840 dim = dm->dim; 841 842 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 843 844 /* get local elements indices in local DMDA numbering */ 845 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 846 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);CHKERRQ(ierr); 847 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 848 849 /* obtain a consistent local ordering for MATIS */ 850 ierr = ISSortRemoveDups(is_loc_filt);CHKERRQ(ierr); 851 ierr = ISBlockGetLocalSize(is_loc_filt,&nb);CHKERRQ(ierr); 852 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 853 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 854 ierr = PetscMalloc1(PetscMax(nb,nv/dof),&gidx);CHKERRQ(ierr); 855 ierr = ISBlockGetIndices(is_loc_filt,&idx);CHKERRQ(ierr); 856 ierr = ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);CHKERRQ(ierr); 857 ierr = ISBlockRestoreIndices(is_loc_filt,&idx);CHKERRQ(ierr); 858 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);CHKERRQ(ierr); 859 ierr = ISLocalToGlobalMappingCreateIS(is_glob,<og);CHKERRQ(ierr); 860 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 861 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 862 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 863 864 /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */ 865 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 867 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 868 ierr = ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);CHKERRQ(ierr); 869 ierr = ISGetIndices(is_glob,&idx);CHKERRQ(ierr); 870 ierr = ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);CHKERRQ(ierr); 871 ierr = ISRestoreIndices(is_glob,&idx);CHKERRQ(ierr); 872 ierr = ISDestroy(&is_glob);CHKERRQ(ierr); 873 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 874 ierr = ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);CHKERRQ(ierr); 875 ierr = ISLocalToGlobalMappingCreateIS(is_loc_filt,<og);CHKERRQ(ierr); 876 ierr = ISDestroy(&is_loc_filt);CHKERRQ(ierr); 877 ierr = MatSetLocalToGlobalMapping(lJ,ltog,ltog);CHKERRQ(ierr); 878 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 879 ierr = PetscFree(gidx);CHKERRQ(ierr); 880 881 /* Preallocation (not exact): we reuse the preallocation routines of the assembled version */ 882 flg = dm->prealloc_only; 883 dm->prealloc_only = PETSC_TRUE; 884 switch (dim) { 885 case 1: 886 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 887 ierr = DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 889 break; 890 case 2: 891 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 892 ierr = DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 893 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 894 break; 895 case 3: 896 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);CHKERRQ(ierr); 897 ierr = DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);CHKERRQ(ierr); 898 ierr = PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 899 break; 900 default: 901 SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim); 902 } 903 dm->prealloc_only = flg; 904 PetscFunctionReturn(0); 905 } 906 907 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 908 { 909 PetscErrorCode ierr; 910 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 911 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 912 MPI_Comm comm; 913 PetscScalar *values; 914 DMBoundaryType bx,by; 915 ISLocalToGlobalMapping ltog; 916 DMDAStencilType st; 917 918 PetscFunctionBegin; 919 /* 920 nc - number of components per grid point 921 col - number of colors needed in one direction for single component problem 922 923 */ 924 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 925 col = 2*s + 1; 926 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 927 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 928 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 929 930 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 931 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 932 933 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 934 /* determine the matrix preallocation information */ 935 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 936 for (i=xs; i<xs+nx; i++) { 937 938 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 939 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 940 941 for (j=ys; j<ys+ny; j++) { 942 slot = i - gxs + gnx*(j - gys); 943 944 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 945 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 946 947 cnt = 0; 948 for (k=0; k<nc; k++) { 949 for (l=lstart; l<lend+1; l++) { 950 for (p=pstart; p<pend+1; p++) { 951 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 952 cols[cnt++] = k + nc*(slot + gnx*l + p); 953 } 954 } 955 } 956 rows[k] = k + nc*(slot); 957 } 958 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 959 } 960 } 961 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 962 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 963 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 964 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 965 966 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 967 968 /* 969 For each node in the grid: we get the neighbors in the local (on processor ordering 970 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 971 PETSc ordering. 972 */ 973 if (!da->prealloc_only) { 974 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 975 for (i=xs; i<xs+nx; i++) { 976 977 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 978 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 979 980 for (j=ys; j<ys+ny; j++) { 981 slot = i - gxs + gnx*(j - gys); 982 983 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 984 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 985 986 cnt = 0; 987 for (k=0; k<nc; k++) { 988 for (l=lstart; l<lend+1; l++) { 989 for (p=pstart; p<pend+1; p++) { 990 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 991 cols[cnt++] = k + nc*(slot + gnx*l + p); 992 } 993 } 994 } 995 rows[k] = k + nc*(slot); 996 } 997 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 } 1000 ierr = PetscFree(values);CHKERRQ(ierr); 1001 /* do not copy values to GPU since they are all zero and not yet needed there */ 1002 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1003 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1006 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1007 } 1008 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 1013 { 1014 PetscErrorCode ierr; 1015 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1016 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1017 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1018 MPI_Comm comm; 1019 PetscScalar *values; 1020 DMBoundaryType bx,by,bz; 1021 ISLocalToGlobalMapping ltog; 1022 DMDAStencilType st; 1023 1024 PetscFunctionBegin; 1025 /* 1026 nc - number of components per grid point 1027 col - number of colors needed in one direction for single component problem 1028 1029 */ 1030 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1031 col = 2*s + 1; 1032 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1033 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1034 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1035 1036 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1037 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1038 1039 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1040 /* determine the matrix preallocation information */ 1041 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1042 for (i=xs; i<xs+nx; i++) { 1043 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1044 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1045 for (j=ys; j<ys+ny; j++) { 1046 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1047 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1048 for (k=zs; k<zs+nz; k++) { 1049 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1050 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1051 1052 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1053 1054 cnt = 0; 1055 for (l=0; l<nc; l++) { 1056 for (ii=istart; ii<iend+1; ii++) { 1057 for (jj=jstart; jj<jend+1; jj++) { 1058 for (kk=kstart; kk<kend+1; kk++) { 1059 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1060 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1061 } 1062 } 1063 } 1064 } 1065 rows[l] = l + nc*(slot); 1066 } 1067 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1068 } 1069 } 1070 } 1071 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1072 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1073 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1074 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1075 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1076 1077 /* 1078 For each node in the grid: we get the neighbors in the local (on processor ordering 1079 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1080 PETSc ordering. 1081 */ 1082 if (!da->prealloc_only) { 1083 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1084 for (i=xs; i<xs+nx; i++) { 1085 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1086 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1087 for (j=ys; j<ys+ny; j++) { 1088 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1089 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1090 for (k=zs; k<zs+nz; k++) { 1091 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1092 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1093 1094 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1095 1096 cnt = 0; 1097 for (l=0; l<nc; l++) { 1098 for (ii=istart; ii<iend+1; ii++) { 1099 for (jj=jstart; jj<jend+1; jj++) { 1100 for (kk=kstart; kk<kend+1; kk++) { 1101 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1102 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1103 } 1104 } 1105 } 1106 } 1107 rows[l] = l + nc*(slot); 1108 } 1109 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1110 } 1111 } 1112 } 1113 ierr = PetscFree(values);CHKERRQ(ierr); 1114 /* do not copy values to GPU since they are all zero and not yet needed there */ 1115 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1116 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1117 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1118 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1119 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1120 } 1121 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1126 { 1127 PetscErrorCode ierr; 1128 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1129 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1130 MPI_Comm comm; 1131 DMBoundaryType bx,by; 1132 ISLocalToGlobalMapping ltog,mltog; 1133 DMDAStencilType st; 1134 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1135 1136 PetscFunctionBegin; 1137 /* 1138 nc - number of components per grid point 1139 col - number of colors needed in one direction for single component problem 1140 1141 */ 1142 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1143 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1144 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1145 } 1146 col = 2*s + 1; 1147 /* 1148 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1149 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1150 */ 1151 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1152 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1153 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1154 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1156 1157 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1158 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1159 1160 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1161 /* determine the matrix preallocation information */ 1162 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1163 for (i=xs; i<xs+nx; i++) { 1164 1165 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1166 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1167 1168 for (j=ys; j<ys+ny; j++) { 1169 slot = i - gxs + gnx*(j - gys); 1170 1171 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1172 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1173 1174 cnt = 0; 1175 for (k=0; k<nc; k++) { 1176 for (l=lstart; l<lend+1; l++) { 1177 for (p=pstart; p<pend+1; p++) { 1178 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1179 cols[cnt++] = k + nc*(slot + gnx*l + p); 1180 } 1181 } 1182 } 1183 rows[k] = k + nc*(slot); 1184 } 1185 if (removedups) { 1186 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1187 } else { 1188 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1189 } 1190 } 1191 } 1192 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1193 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1194 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1195 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1196 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1197 if (!mltog) { 1198 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1199 } 1200 1201 /* 1202 For each node in the grid: we get the neighbors in the local (on processor ordering 1203 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1204 PETSc ordering. 1205 */ 1206 if (!da->prealloc_only) { 1207 for (i=xs; i<xs+nx; i++) { 1208 1209 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1210 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1211 1212 for (j=ys; j<ys+ny; j++) { 1213 slot = i - gxs + gnx*(j - gys); 1214 1215 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1216 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1217 1218 cnt = 0; 1219 for (l=lstart; l<lend+1; l++) { 1220 for (p=pstart; p<pend+1; p++) { 1221 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1222 cols[cnt++] = nc*(slot + gnx*l + p); 1223 for (k=1; k<nc; k++) { 1224 cols[cnt] = 1 + cols[cnt-1];cnt++; 1225 } 1226 } 1227 } 1228 } 1229 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1230 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1231 } 1232 } 1233 /* do not copy values to GPU since they are all zero and not yet needed there */ 1234 ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr); 1235 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1236 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1237 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1238 if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);} 1239 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1240 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1241 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1242 } 1243 } 1244 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1249 { 1250 PetscErrorCode ierr; 1251 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1252 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1253 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1254 DM_DA *dd = (DM_DA*)da->data; 1255 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1256 MPI_Comm comm; 1257 DMBoundaryType bx,by; 1258 ISLocalToGlobalMapping ltog; 1259 DMDAStencilType st; 1260 PetscBool removedups = PETSC_FALSE; 1261 1262 PetscFunctionBegin; 1263 /* 1264 nc - number of components per grid point 1265 col - number of colors needed in one direction for single component problem 1266 1267 */ 1268 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1269 col = 2*s + 1; 1270 /* 1271 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1272 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1273 */ 1274 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1275 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1276 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1277 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1279 1280 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1281 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1282 1283 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1284 /* determine the matrix preallocation information */ 1285 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1286 for (i=xs; i<xs+nx; i++) { 1287 1288 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1289 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1290 1291 for (j=ys; j<ys+ny; j++) { 1292 slot = i - gxs + gnx*(j - gys); 1293 1294 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1295 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1296 1297 for (k=0; k<nc; k++) { 1298 cnt = 0; 1299 for (l=lstart; l<lend+1; l++) { 1300 for (p=pstart; p<pend+1; p++) { 1301 if (l || p) { 1302 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1303 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1304 } 1305 } else { 1306 if (dfill) { 1307 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1308 } else { 1309 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1310 } 1311 } 1312 } 1313 } 1314 row = k + nc*(slot); 1315 maxcnt = PetscMax(maxcnt,cnt); 1316 if (removedups) { 1317 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1318 } else { 1319 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1320 } 1321 } 1322 } 1323 } 1324 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1325 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1326 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1327 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1328 1329 /* 1330 For each node in the grid: we get the neighbors in the local (on processor ordering 1331 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1332 PETSc ordering. 1333 */ 1334 if (!da->prealloc_only) { 1335 for (i=xs; i<xs+nx; i++) { 1336 1337 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1338 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1339 1340 for (j=ys; j<ys+ny; j++) { 1341 slot = i - gxs + gnx*(j - gys); 1342 1343 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1344 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1345 1346 for (k=0; k<nc; k++) { 1347 cnt = 0; 1348 for (l=lstart; l<lend+1; l++) { 1349 for (p=pstart; p<pend+1; p++) { 1350 if (l || p) { 1351 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1352 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1353 } 1354 } else { 1355 if (dfill) { 1356 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1357 } else { 1358 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1359 } 1360 } 1361 } 1362 } 1363 row = k + nc*(slot); 1364 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1365 } 1366 } 1367 } 1368 /* do not copy values to GPU since they are all zero and not yet needed there */ 1369 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1370 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1371 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1372 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1373 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscFree(cols);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 /* ---------------------------------------------------------------------------------*/ 1380 1381 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1382 { 1383 PetscErrorCode ierr; 1384 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1385 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1386 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1387 MPI_Comm comm; 1388 DMBoundaryType bx,by,bz; 1389 ISLocalToGlobalMapping ltog,mltog; 1390 DMDAStencilType st; 1391 PetscBool removedups = PETSC_FALSE; 1392 1393 PetscFunctionBegin; 1394 /* 1395 nc - number of components per grid point 1396 col - number of colors needed in one direction for single component problem 1397 1398 */ 1399 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1400 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1401 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 col = 2*s + 1; 1404 1405 /* 1406 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1407 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1408 */ 1409 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1410 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1411 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1412 1413 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1414 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1415 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1416 1417 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1418 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1419 1420 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1421 /* determine the matrix preallocation information */ 1422 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1423 for (i=xs; i<xs+nx; i++) { 1424 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1425 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1426 for (j=ys; j<ys+ny; j++) { 1427 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1428 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1429 for (k=zs; k<zs+nz; k++) { 1430 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1431 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1432 1433 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1434 1435 cnt = 0; 1436 for (l=0; l<nc; l++) { 1437 for (ii=istart; ii<iend+1; ii++) { 1438 for (jj=jstart; jj<jend+1; jj++) { 1439 for (kk=kstart; kk<kend+1; kk++) { 1440 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1441 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1442 } 1443 } 1444 } 1445 } 1446 rows[l] = l + nc*(slot); 1447 } 1448 if (removedups) { 1449 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1452 } 1453 } 1454 } 1455 } 1456 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1457 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1458 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1459 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1460 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1461 if (!mltog) { 1462 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1463 } 1464 1465 /* 1466 For each node in the grid: we get the neighbors in the local (on processor ordering 1467 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1468 PETSc ordering. 1469 */ 1470 if (!da->prealloc_only) { 1471 for (i=xs; i<xs+nx; i++) { 1472 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1473 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1474 for (j=ys; j<ys+ny; j++) { 1475 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1476 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1477 for (k=zs; k<zs+nz; k++) { 1478 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1479 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1480 1481 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1482 1483 cnt = 0; 1484 for (kk=kstart; kk<kend+1; kk++) { 1485 for (jj=jstart; jj<jend+1; jj++) { 1486 for (ii=istart; ii<iend+1; ii++) { 1487 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1488 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1489 for (l=1; l<nc; l++) { 1490 cols[cnt] = 1 + cols[cnt-1];cnt++; 1491 } 1492 } 1493 } 1494 } 1495 } 1496 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1497 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1498 } 1499 } 1500 } 1501 /* do not copy values to GPU since they are all zero and not yet needed there */ 1502 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1503 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1506 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1507 } 1508 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1509 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1510 } 1511 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /* ---------------------------------------------------------------------------------*/ 1516 1517 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1518 { 1519 PetscErrorCode ierr; 1520 DM_DA *dd = (DM_DA*)da->data; 1521 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1522 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1523 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1524 DMBoundaryType bx; 1525 ISLocalToGlobalMapping ltog; 1526 PetscMPIInt rank,size; 1527 1528 PetscFunctionBegin; 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1530 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1531 1532 /* 1533 nc - number of components per grid point 1534 1535 */ 1536 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1537 if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1538 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1539 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1540 1541 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1542 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1543 1544 /* 1545 note should be smaller for first and last process with no periodic 1546 does not handle dfill 1547 */ 1548 cnt = 0; 1549 /* coupling with process to the left */ 1550 for (i=0; i<s; i++) { 1551 for (j=0; j<nc; j++) { 1552 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1553 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1554 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1555 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1556 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1557 } 1558 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1559 cnt++; 1560 } 1561 } 1562 for (i=s; i<nx-s; i++) { 1563 for (j=0; j<nc; j++) { 1564 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1565 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1566 cnt++; 1567 } 1568 } 1569 /* coupling with process to the right */ 1570 for (i=nx-s; i<nx; i++) { 1571 for (j=0; j<nc; j++) { 1572 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1573 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1574 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1575 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1576 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1577 } 1578 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1579 cnt++; 1580 } 1581 } 1582 1583 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1584 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1585 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1586 1587 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1588 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1589 1590 /* 1591 For each node in the grid: we get the neighbors in the local (on processor ordering 1592 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1593 PETSc ordering. 1594 */ 1595 if (!da->prealloc_only) { 1596 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1597 row = xs*nc; 1598 /* coupling with process to the left */ 1599 for (i=xs; i<xs+s; i++) { 1600 for (j=0; j<nc; j++) { 1601 cnt = 0; 1602 if (rank) { 1603 for (l=0; l<s; l++) { 1604 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1605 } 1606 } 1607 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1608 for (l=0; l<s; l++) { 1609 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1610 } 1611 } 1612 if (dfill) { 1613 for (k=dfill[j]; k<dfill[j+1]; k++) { 1614 cols[cnt++] = i*nc + dfill[k]; 1615 } 1616 } else { 1617 for (k=0; k<nc; k++) { 1618 cols[cnt++] = i*nc + k; 1619 } 1620 } 1621 for (l=0; l<s; l++) { 1622 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1623 } 1624 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1625 row++; 1626 } 1627 } 1628 for (i=xs+s; i<xs+nx-s; i++) { 1629 for (j=0; j<nc; j++) { 1630 cnt = 0; 1631 for (l=0; l<s; l++) { 1632 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1633 } 1634 if (dfill) { 1635 for (k=dfill[j]; k<dfill[j+1]; k++) { 1636 cols[cnt++] = i*nc + dfill[k]; 1637 } 1638 } else { 1639 for (k=0; k<nc; k++) { 1640 cols[cnt++] = i*nc + k; 1641 } 1642 } 1643 for (l=0; l<s; l++) { 1644 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1645 } 1646 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1647 row++; 1648 } 1649 } 1650 /* coupling with process to the right */ 1651 for (i=xs+nx-s; i<xs+nx; i++) { 1652 for (j=0; j<nc; j++) { 1653 cnt = 0; 1654 for (l=0; l<s; l++) { 1655 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1656 } 1657 if (dfill) { 1658 for (k=dfill[j]; k<dfill[j+1]; k++) { 1659 cols[cnt++] = i*nc + dfill[k]; 1660 } 1661 } else { 1662 for (k=0; k<nc; k++) { 1663 cols[cnt++] = i*nc + k; 1664 } 1665 } 1666 if (rank < size-1) { 1667 for (l=0; l<s; l++) { 1668 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1669 } 1670 } 1671 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1672 for (l=0; l<s; l++) { 1673 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1674 } 1675 } 1676 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1677 row++; 1678 } 1679 } 1680 ierr = PetscFree(cols);CHKERRQ(ierr); 1681 /* do not copy values to GPU since they are all zero and not yet needed there */ 1682 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1683 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1686 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /* ---------------------------------------------------------------------------------*/ 1692 1693 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS) 1694 { 1695 PetscErrorCode ierr; 1696 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1697 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1698 PetscInt istart,iend; 1699 DMBoundaryType bx; 1700 ISLocalToGlobalMapping ltog,mltog; 1701 1702 PetscFunctionBegin; 1703 /* 1704 nc - number of components per grid point 1705 col - number of colors needed in one direction for single component problem 1706 1707 */ 1708 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1709 if (!isIS && bx == DM_BOUNDARY_NONE) { 1710 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1711 } 1712 col = 2*s + 1; 1713 1714 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1715 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1716 1717 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1718 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1719 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1720 1721 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1722 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1723 if (!mltog) { 1724 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1725 } 1726 1727 /* 1728 For each node in the grid: we get the neighbors in the local (on processor ordering 1729 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1730 PETSc ordering. 1731 */ 1732 if (!da->prealloc_only) { 1733 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1734 for (i=xs; i<xs+nx; i++) { 1735 istart = PetscMax(-s,gxs - i); 1736 iend = PetscMin(s,gxs + gnx - i - 1); 1737 slot = i - gxs; 1738 1739 cnt = 0; 1740 for (i1=istart; i1<iend+1; i1++) { 1741 cols[cnt++] = nc*(slot + i1); 1742 for (l=1; l<nc; l++) { 1743 cols[cnt] = 1 + cols[cnt-1];cnt++; 1744 } 1745 } 1746 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1747 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1748 } 1749 /* do not copy values to GPU since they are all zero and not yet needed there */ 1750 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1751 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1752 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 if (!isIS && bx == DM_BOUNDARY_NONE) { 1754 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1755 } 1756 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1757 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1758 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 /* ---------------------------------------------------------------------------------*/ 1764 1765 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS) 1766 { 1767 PetscErrorCode ierr; 1768 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1769 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1770 PetscInt istart,iend; 1771 DMBoundaryType bx; 1772 ISLocalToGlobalMapping ltog,mltog; 1773 1774 PetscFunctionBegin; 1775 /* 1776 nc - number of components per grid point 1777 col - number of colors needed in one direction for single component problem 1778 */ 1779 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1780 col = 2*s + 1; 1781 1782 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1783 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1784 1785 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1786 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1787 1788 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1789 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1790 if (!mltog) { 1791 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1792 } 1793 1794 /* 1795 For each node in the grid: we get the neighbors in the local (on processor ordering 1796 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1797 PETSc ordering. 1798 */ 1799 if (!da->prealloc_only) { 1800 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1801 for (i=xs; i<xs+nx; i++) { 1802 istart = PetscMax(-s,gxs - i); 1803 iend = PetscMin(s,gxs + gnx - i - 1); 1804 slot = i - gxs; 1805 1806 cnt = 0; 1807 for (i1=istart; i1<iend+1; i1++) { 1808 cols[cnt++] = nc*(slot + i1); 1809 for (l=1; l<nc; l++) { 1810 cols[cnt] = 1 + cols[cnt-1];cnt++; 1811 } 1812 } 1813 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1814 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1815 } 1816 /* do not copy values to GPU since they are all zero and not yet needed there */ 1817 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1818 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 if (!isIS && bx == DM_BOUNDARY_NONE) { 1821 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1822 } 1823 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1824 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1825 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1826 } 1827 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1832 { 1833 PetscErrorCode ierr; 1834 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1835 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1836 PetscInt istart,iend,jstart,jend,ii,jj; 1837 MPI_Comm comm; 1838 PetscScalar *values; 1839 DMBoundaryType bx,by; 1840 DMDAStencilType st; 1841 ISLocalToGlobalMapping ltog; 1842 1843 PetscFunctionBegin; 1844 /* 1845 nc - number of components per grid point 1846 col - number of colors needed in one direction for single component problem 1847 */ 1848 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1849 col = 2*s + 1; 1850 1851 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1852 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1853 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1854 1855 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1856 1857 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1858 1859 /* determine the matrix preallocation information */ 1860 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1861 for (i=xs; i<xs+nx; i++) { 1862 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1863 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1864 for (j=ys; j<ys+ny; j++) { 1865 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1866 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1867 slot = i - gxs + gnx*(j - gys); 1868 1869 /* Find block columns in block row */ 1870 cnt = 0; 1871 for (ii=istart; ii<iend+1; ii++) { 1872 for (jj=jstart; jj<jend+1; jj++) { 1873 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1874 cols[cnt++] = slot + ii + gnx*jj; 1875 } 1876 } 1877 } 1878 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1879 } 1880 } 1881 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1882 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1883 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1884 1885 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1886 1887 /* 1888 For each node in the grid: we get the neighbors in the local (on processor ordering 1889 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1890 PETSc ordering. 1891 */ 1892 if (!da->prealloc_only) { 1893 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1894 for (i=xs; i<xs+nx; i++) { 1895 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1896 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1897 for (j=ys; j<ys+ny; j++) { 1898 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1899 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1900 slot = i - gxs + gnx*(j - gys); 1901 cnt = 0; 1902 for (ii=istart; ii<iend+1; ii++) { 1903 for (jj=jstart; jj<jend+1; jj++) { 1904 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1905 cols[cnt++] = slot + ii + gnx*jj; 1906 } 1907 } 1908 } 1909 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1910 } 1911 } 1912 ierr = PetscFree(values);CHKERRQ(ierr); 1913 /* do not copy values to GPU since they are all zero and not yet needed there */ 1914 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1915 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1916 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1917 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1918 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1919 } 1920 ierr = PetscFree(cols);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1925 { 1926 PetscErrorCode ierr; 1927 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1928 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1929 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1930 MPI_Comm comm; 1931 PetscScalar *values; 1932 DMBoundaryType bx,by,bz; 1933 DMDAStencilType st; 1934 ISLocalToGlobalMapping ltog; 1935 1936 PetscFunctionBegin; 1937 /* 1938 nc - number of components per grid point 1939 col - number of colors needed in one direction for single component problem 1940 1941 */ 1942 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1943 col = 2*s + 1; 1944 1945 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1946 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1947 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1948 1949 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1950 1951 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1952 1953 /* determine the matrix preallocation information */ 1954 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1955 for (i=xs; i<xs+nx; i++) { 1956 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1957 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1958 for (j=ys; j<ys+ny; j++) { 1959 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1960 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1961 for (k=zs; k<zs+nz; k++) { 1962 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1963 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1964 1965 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1966 1967 /* Find block columns in block row */ 1968 cnt = 0; 1969 for (ii=istart; ii<iend+1; ii++) { 1970 for (jj=jstart; jj<jend+1; jj++) { 1971 for (kk=kstart; kk<kend+1; kk++) { 1972 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1973 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1974 } 1975 } 1976 } 1977 } 1978 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1979 } 1980 } 1981 } 1982 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1983 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1984 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1985 1986 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1987 1988 /* 1989 For each node in the grid: we get the neighbors in the local (on processor ordering 1990 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1991 PETSc ordering. 1992 */ 1993 if (!da->prealloc_only) { 1994 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1995 for (i=xs; i<xs+nx; i++) { 1996 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1997 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1998 for (j=ys; j<ys+ny; j++) { 1999 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2000 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2001 for (k=zs; k<zs+nz; k++) { 2002 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2003 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2004 2005 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2006 2007 cnt = 0; 2008 for (ii=istart; ii<iend+1; ii++) { 2009 for (jj=jstart; jj<jend+1; jj++) { 2010 for (kk=kstart; kk<kend+1; kk++) { 2011 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2012 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2013 } 2014 } 2015 } 2016 } 2017 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2018 } 2019 } 2020 } 2021 ierr = PetscFree(values);CHKERRQ(ierr); 2022 /* do not copy values to GPU since they are all zero and not yet needed there */ 2023 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2024 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2026 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2027 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2028 } 2029 ierr = PetscFree(cols);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 /* 2034 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2035 identify in the local ordering with periodic domain. 2036 */ 2037 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2038 { 2039 PetscErrorCode ierr; 2040 PetscInt i,n; 2041 2042 PetscFunctionBegin; 2043 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2044 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2045 for (i=0,n=0; i<*cnt; i++) { 2046 if (col[i] >= *row) col[n++] = col[i]; 2047 } 2048 *cnt = n; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2053 { 2054 PetscErrorCode ierr; 2055 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2056 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2057 PetscInt istart,iend,jstart,jend,ii,jj; 2058 MPI_Comm comm; 2059 PetscScalar *values; 2060 DMBoundaryType bx,by; 2061 DMDAStencilType st; 2062 ISLocalToGlobalMapping ltog; 2063 2064 PetscFunctionBegin; 2065 /* 2066 nc - number of components per grid point 2067 col - number of colors needed in one direction for single component problem 2068 */ 2069 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2070 col = 2*s + 1; 2071 2072 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2073 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2074 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2075 2076 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2077 2078 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2079 2080 /* determine the matrix preallocation information */ 2081 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2082 for (i=xs; i<xs+nx; i++) { 2083 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2084 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2085 for (j=ys; j<ys+ny; j++) { 2086 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2087 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2088 slot = i - gxs + gnx*(j - gys); 2089 2090 /* Find block columns in block row */ 2091 cnt = 0; 2092 for (ii=istart; ii<iend+1; ii++) { 2093 for (jj=jstart; jj<jend+1; jj++) { 2094 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2095 cols[cnt++] = slot + ii + gnx*jj; 2096 } 2097 } 2098 } 2099 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2100 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2101 } 2102 } 2103 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2104 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2105 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2106 2107 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2108 2109 /* 2110 For each node in the grid: we get the neighbors in the local (on processor ordering 2111 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2112 PETSc ordering. 2113 */ 2114 if (!da->prealloc_only) { 2115 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2116 for (i=xs; i<xs+nx; i++) { 2117 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2118 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2119 for (j=ys; j<ys+ny; j++) { 2120 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2121 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2122 slot = i - gxs + gnx*(j - gys); 2123 2124 /* Find block columns in block row */ 2125 cnt = 0; 2126 for (ii=istart; ii<iend+1; ii++) { 2127 for (jj=jstart; jj<jend+1; jj++) { 2128 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2129 cols[cnt++] = slot + ii + gnx*jj; 2130 } 2131 } 2132 } 2133 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2134 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2135 } 2136 } 2137 ierr = PetscFree(values);CHKERRQ(ierr); 2138 /* do not copy values to GPU since they are all zero and not yet needed there */ 2139 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2140 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2143 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2144 } 2145 ierr = PetscFree(cols);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2150 { 2151 PetscErrorCode ierr; 2152 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2153 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2154 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2155 MPI_Comm comm; 2156 PetscScalar *values; 2157 DMBoundaryType bx,by,bz; 2158 DMDAStencilType st; 2159 ISLocalToGlobalMapping ltog; 2160 2161 PetscFunctionBegin; 2162 /* 2163 nc - number of components per grid point 2164 col - number of colors needed in one direction for single component problem 2165 */ 2166 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2167 col = 2*s + 1; 2168 2169 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2170 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2171 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2172 2173 /* create the matrix */ 2174 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2175 2176 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2177 2178 /* determine the matrix preallocation information */ 2179 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2180 for (i=xs; i<xs+nx; i++) { 2181 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2182 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2183 for (j=ys; j<ys+ny; j++) { 2184 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2185 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2186 for (k=zs; k<zs+nz; k++) { 2187 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2188 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2189 2190 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2191 2192 /* Find block columns in block row */ 2193 cnt = 0; 2194 for (ii=istart; ii<iend+1; ii++) { 2195 for (jj=jstart; jj<jend+1; jj++) { 2196 for (kk=kstart; kk<kend+1; kk++) { 2197 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2198 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2199 } 2200 } 2201 } 2202 } 2203 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2204 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2205 } 2206 } 2207 } 2208 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2209 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2210 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2211 2212 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2213 2214 /* 2215 For each node in the grid: we get the neighbors in the local (on processor ordering 2216 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2217 PETSc ordering. 2218 */ 2219 if (!da->prealloc_only) { 2220 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2221 for (i=xs; i<xs+nx; i++) { 2222 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2223 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2224 for (j=ys; j<ys+ny; j++) { 2225 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2226 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2227 for (k=zs; k<zs+nz; k++) { 2228 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2229 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2230 2231 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2232 2233 cnt = 0; 2234 for (ii=istart; ii<iend+1; ii++) { 2235 for (jj=jstart; jj<jend+1; jj++) { 2236 for (kk=kstart; kk<kend+1; kk++) { 2237 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2238 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2239 } 2240 } 2241 } 2242 } 2243 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2244 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 } 2247 } 2248 ierr = PetscFree(values);CHKERRQ(ierr); 2249 /* do not copy values to GPU since they are all zero and not yet needed there */ 2250 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2251 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2254 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2255 } 2256 ierr = PetscFree(cols);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 /* ---------------------------------------------------------------------------------*/ 2261 2262 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2263 { 2264 PetscErrorCode ierr; 2265 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2266 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2267 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2268 DM_DA *dd = (DM_DA*)da->data; 2269 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2270 MPI_Comm comm; 2271 PetscScalar *values; 2272 DMBoundaryType bx,by,bz; 2273 ISLocalToGlobalMapping ltog; 2274 DMDAStencilType st; 2275 PetscBool removedups = PETSC_FALSE; 2276 2277 PetscFunctionBegin; 2278 /* 2279 nc - number of components per grid point 2280 col - number of colors needed in one direction for single component problem 2281 2282 */ 2283 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2284 col = 2*s + 1; 2285 if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2286 by 2*stencil_width + 1\n"); 2287 if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2288 by 2*stencil_width + 1\n"); 2289 if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2290 by 2*stencil_width + 1\n"); 2291 2292 /* 2293 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2294 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2295 */ 2296 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2297 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2298 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2299 2300 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2301 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2302 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2303 2304 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2305 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2306 2307 /* determine the matrix preallocation information */ 2308 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2309 2310 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2311 for (i=xs; i<xs+nx; i++) { 2312 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2313 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2314 for (j=ys; j<ys+ny; j++) { 2315 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2316 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2317 for (k=zs; k<zs+nz; k++) { 2318 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2319 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2320 2321 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2322 2323 for (l=0; l<nc; l++) { 2324 cnt = 0; 2325 for (ii=istart; ii<iend+1; ii++) { 2326 for (jj=jstart; jj<jend+1; jj++) { 2327 for (kk=kstart; kk<kend+1; kk++) { 2328 if (ii || jj || kk) { 2329 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2330 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2331 } 2332 } else { 2333 if (dfill) { 2334 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2335 } else { 2336 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2337 } 2338 } 2339 } 2340 } 2341 } 2342 row = l + nc*(slot); 2343 maxcnt = PetscMax(maxcnt,cnt); 2344 if (removedups) { 2345 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2346 } else { 2347 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2348 } 2349 } 2350 } 2351 } 2352 } 2353 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2354 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2355 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2356 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2357 2358 /* 2359 For each node in the grid: we get the neighbors in the local (on processor ordering 2360 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2361 PETSc ordering. 2362 */ 2363 if (!da->prealloc_only) { 2364 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2365 for (i=xs; i<xs+nx; i++) { 2366 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2367 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2368 for (j=ys; j<ys+ny; j++) { 2369 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2370 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2371 for (k=zs; k<zs+nz; k++) { 2372 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2373 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2374 2375 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2376 2377 for (l=0; l<nc; l++) { 2378 cnt = 0; 2379 for (ii=istart; ii<iend+1; ii++) { 2380 for (jj=jstart; jj<jend+1; jj++) { 2381 for (kk=kstart; kk<kend+1; kk++) { 2382 if (ii || jj || kk) { 2383 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2384 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2385 } 2386 } else { 2387 if (dfill) { 2388 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2389 } else { 2390 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2391 } 2392 } 2393 } 2394 } 2395 } 2396 row = l + nc*(slot); 2397 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2398 } 2399 } 2400 } 2401 } 2402 ierr = PetscFree(values);CHKERRQ(ierr); 2403 /* do not copy values to GPU since they are all zero and not yet needed there */ 2404 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2405 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2406 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2407 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2408 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2409 } 2410 ierr = PetscFree(cols);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413