1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscErrorCode ierr; 20 PetscInt i,j,nz,*fill; 21 22 PetscFunctionBegin; 23 if (!dfill) PetscFunctionReturn(0); 24 25 /* count number nonzeros */ 26 nz = 0; 27 for (i=0; i<w; i++) { 28 for (j=0; j<w; j++) { 29 if (dfill[w*i+j]) nz++; 30 } 31 } 32 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 33 /* construct modified CSR storage of nonzero structure */ 34 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 35 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 36 nz = w + 1; 37 for (i=0; i<w; i++) { 38 fill[i] = nz; 39 for (j=0; j<w; j++) { 40 if (dfill[w*i+j]) { 41 fill[nz] = j; 42 nz++; 43 } 44 } 45 } 46 fill[w] = nz; 47 48 *rfill = fill; 49 PetscFunctionReturn(0); 50 } 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 70 { 71 PetscErrorCode ierr; 72 PetscInt i,k,cnt = 1; 73 74 PetscFunctionBegin; 75 76 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 77 columns to the left with any nonzeros in them plus 1 */ 78 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 79 for (i=0; i<dd->w; i++) { 80 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 81 } 82 for (i=0; i<dd->w; i++) { 83 if (dd->ofillcols[i]) { 84 dd->ofillcols[i] = cnt++; 85 } 86 } 87 PetscFunctionReturn(0); 88 } 89 90 /*@ 91 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 92 of the matrix returned by DMCreateMatrix(). 93 94 Logically Collective on da 95 96 Input Parameters: 97 + da - the distributed array 98 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 99 - ofill - the fill pattern in the off-diagonal blocks 100 101 Level: developer 102 103 Notes: 104 This only makes sense when you are doing multicomponent problems but using the 105 MPIAIJ matrix format 106 107 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 108 representing coupling and 0 entries for missing coupling. For example 109 $ dfill[9] = {1, 0, 0, 110 $ 1, 1, 0, 111 $ 0, 1, 1} 112 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 113 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 114 diagonal block). 115 116 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 117 can be represented in the dfill, ofill format 118 119 Contributed by Glenn Hammond 120 121 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 122 123 @*/ 124 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 125 { 126 DM_DA *dd = (DM_DA*)da->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 /* save the given dfill and ofill information */ 131 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 132 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 133 134 /* count nonzeros in ofill columns */ 135 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 136 137 PetscFunctionReturn(0); 138 } 139 140 /*@ 141 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 142 of the matrix returned by DMCreateMatrix(), using sparse representations 143 of fill patterns. 144 145 Logically Collective on da 146 147 Input Parameters: 148 + da - the distributed array 149 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 150 - ofill - the sparse fill pattern in the off-diagonal blocks 151 152 Level: developer 153 154 Notes: This only makes sense when you are doing multicomponent problems but using the 155 MPIAIJ matrix format 156 157 The format for dfill and ofill is a sparse representation of a 158 dof-by-dof matrix with 1 entries representing coupling and 0 entries 159 for missing coupling. The sparse representation is a 1 dimensional 160 array of length nz + dof + 1, where nz is the number of non-zeros in 161 the matrix. The first dof entries in the array give the 162 starting array indices of each row's items in the rest of the array, 163 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 164 and the remaining nz items give the column indices of each of 165 the 1s within the logical 2D matrix. Each row's items within 166 the array are the column indices of the 1s within that row 167 of the 2D matrix. PETSc developers may recognize that this is the 168 same format as that computed by the DMDASetBlockFills_Private() 169 function from a dense 2D matrix representation. 170 171 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 172 can be represented in the dfill, ofill format 173 174 Contributed by Philip C. Roth 175 176 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 177 178 @*/ 179 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 180 { 181 DM_DA *dd = (DM_DA*)da->data; 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 /* save the given dfill and ofill information */ 186 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 187 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 188 189 /* count nonzeros in ofill columns */ 190 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 191 192 PetscFunctionReturn(0); 193 } 194 195 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 196 { 197 PetscErrorCode ierr; 198 PetscInt dim,m,n,p,nc; 199 DMBoundaryType bx,by,bz; 200 MPI_Comm comm; 201 PetscMPIInt size; 202 PetscBool isBAIJ; 203 DM_DA *dd = (DM_DA*)da->data; 204 205 PetscFunctionBegin; 206 /* 207 m 208 ------------------------------------------------------ 209 | | 210 | | 211 | ---------------------- | 212 | | | | 213 n | yn | | | 214 | | | | 215 | .--------------------- | 216 | (xs,ys) xn | 217 | . | 218 | (gxs,gys) | 219 | | 220 ----------------------------------------------------- 221 */ 222 223 /* 224 nc - number of components per grid point 225 col - number of colors needed in one direction for single component problem 226 227 */ 228 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 229 230 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 231 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 232 if (ctype == IS_COLORING_LOCAL) { 233 if (size == 1) { 234 ctype = IS_COLORING_GLOBAL; 235 } else if (dim > 1) { 236 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 237 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 238 } 239 } 240 } 241 242 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 243 matrices is for the blocks, not the individual matrix elements */ 244 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 247 if (isBAIJ) { 248 dd->w = 1; 249 dd->xs = dd->xs/nc; 250 dd->xe = dd->xe/nc; 251 dd->Xs = dd->Xs/nc; 252 dd->Xe = dd->Xe/nc; 253 } 254 255 /* 256 We do not provide a getcoloring function in the DMDA operations because 257 the basic DMDA does not know about matrices. We think of DMDA as being 258 more low-level then matrices. 259 */ 260 if (dim == 1) { 261 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 262 } else if (dim == 2) { 263 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 264 } else if (dim == 3) { 265 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 266 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 267 if (isBAIJ) { 268 dd->w = nc; 269 dd->xs = dd->xs*nc; 270 dd->xe = dd->xe*nc; 271 dd->Xs = dd->Xs*nc; 272 dd->Xe = dd->Xe*nc; 273 } 274 PetscFunctionReturn(0); 275 } 276 277 /* ---------------------------------------------------------------------------------*/ 278 279 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 280 { 281 PetscErrorCode ierr; 282 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 283 PetscInt ncolors; 284 MPI_Comm comm; 285 DMBoundaryType bx,by; 286 DMDAStencilType st; 287 ISColoringValue *colors; 288 DM_DA *dd = (DM_DA*)da->data; 289 290 PetscFunctionBegin; 291 /* 292 nc - number of components per grid point 293 col - number of colors needed in one direction for single component problem 294 295 */ 296 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 297 col = 2*s + 1; 298 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 299 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 300 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 301 302 /* special case as taught to us by Paul Hovland */ 303 if (st == DMDA_STENCIL_STAR && s == 1) { 304 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 305 } else { 306 if (ctype == IS_COLORING_GLOBAL) { 307 if (!dd->localcoloring) { 308 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 309 ii = 0; 310 for (j=ys; j<ys+ny; j++) { 311 for (i=xs; i<xs+nx; i++) { 312 for (k=0; k<nc; k++) { 313 colors[ii++] = k + nc*((i % col) + col*(j % col)); 314 } 315 } 316 } 317 ncolors = nc + nc*(col-1 + col*(col-1)); 318 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 319 } 320 *coloring = dd->localcoloring; 321 } else if (ctype == IS_COLORING_LOCAL) { 322 if (!dd->ghostedcoloring) { 323 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 324 ii = 0; 325 for (j=gys; j<gys+gny; j++) { 326 for (i=gxs; i<gxs+gnx; i++) { 327 for (k=0; k<nc; k++) { 328 /* the complicated stuff is to handle periodic boundaries */ 329 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 330 } 331 } 332 } 333 ncolors = nc + nc*(col - 1 + col*(col-1)); 334 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 335 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 336 337 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 338 } 339 *coloring = dd->ghostedcoloring; 340 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 341 } 342 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 /* ---------------------------------------------------------------------------------*/ 347 348 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 349 { 350 PetscErrorCode ierr; 351 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 352 PetscInt ncolors; 353 MPI_Comm comm; 354 DMBoundaryType bx,by,bz; 355 DMDAStencilType st; 356 ISColoringValue *colors; 357 DM_DA *dd = (DM_DA*)da->data; 358 359 PetscFunctionBegin; 360 /* 361 nc - number of components per grid point 362 col - number of colors needed in one direction for single component problem 363 364 */ 365 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 366 col = 2*s + 1; 367 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 368 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 369 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 370 371 /* create the coloring */ 372 if (ctype == IS_COLORING_GLOBAL) { 373 if (!dd->localcoloring) { 374 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 375 ii = 0; 376 for (k=zs; k<zs+nz; k++) { 377 for (j=ys; j<ys+ny; j++) { 378 for (i=xs; i<xs+nx; i++) { 379 for (l=0; l<nc; l++) { 380 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 386 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 387 } 388 *coloring = dd->localcoloring; 389 } else if (ctype == IS_COLORING_LOCAL) { 390 if (!dd->ghostedcoloring) { 391 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 392 ii = 0; 393 for (k=gzs; k<gzs+gnz; k++) { 394 for (j=gys; j<gys+gny; j++) { 395 for (i=gxs; i<gxs+gnx; i++) { 396 for (l=0; l<nc; l++) { 397 /* the complicated stuff is to handle periodic boundaries */ 398 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 399 } 400 } 401 } 402 } 403 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 404 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 405 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 406 } 407 *coloring = dd->ghostedcoloring; 408 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 409 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 /* ---------------------------------------------------------------------------------*/ 414 415 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 416 { 417 PetscErrorCode ierr; 418 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 419 PetscInt ncolors; 420 MPI_Comm comm; 421 DMBoundaryType bx; 422 ISColoringValue *colors; 423 DM_DA *dd = (DM_DA*)da->data; 424 425 PetscFunctionBegin; 426 /* 427 nc - number of components per grid point 428 col - number of colors needed in one direction for single component problem 429 430 */ 431 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 432 col = 2*s + 1; 433 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 434 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 435 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 436 437 /* create the coloring */ 438 if (ctype == IS_COLORING_GLOBAL) { 439 if (!dd->localcoloring) { 440 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 441 if (dd->ofillcols) { 442 PetscInt tc = 0; 443 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 444 i1 = 0; 445 for (i=xs; i<xs+nx; i++) { 446 for (l=0; l<nc; l++) { 447 if (dd->ofillcols[l] && (i % col)) { 448 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 449 } else { 450 colors[i1++] = l; 451 } 452 } 453 } 454 ncolors = nc + 2*s*tc; 455 } else { 456 i1 = 0; 457 for (i=xs; i<xs+nx; i++) { 458 for (l=0; l<nc; l++) { 459 colors[i1++] = l + nc*(i % col); 460 } 461 } 462 ncolors = nc + nc*(col-1); 463 } 464 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 465 } 466 *coloring = dd->localcoloring; 467 } else if (ctype == IS_COLORING_LOCAL) { 468 if (!dd->ghostedcoloring) { 469 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 470 i1 = 0; 471 for (i=gxs; i<gxs+gnx; i++) { 472 for (l=0; l<nc; l++) { 473 /* the complicated stuff is to handle periodic boundaries */ 474 colors[i1++] = l + nc*(SetInRange(i,m) % col); 475 } 476 } 477 ncolors = nc + nc*(col-1); 478 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 479 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 480 } 481 *coloring = dd->ghostedcoloring; 482 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 483 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 488 { 489 PetscErrorCode ierr; 490 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 491 PetscInt ncolors; 492 MPI_Comm comm; 493 DMBoundaryType bx,by; 494 ISColoringValue *colors; 495 DM_DA *dd = (DM_DA*)da->data; 496 497 PetscFunctionBegin; 498 /* 499 nc - number of components per grid point 500 col - number of colors needed in one direction for single component problem 501 502 */ 503 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 505 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 506 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 507 /* create the coloring */ 508 if (ctype == IS_COLORING_GLOBAL) { 509 if (!dd->localcoloring) { 510 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 511 ii = 0; 512 for (j=ys; j<ys+ny; j++) { 513 for (i=xs; i<xs+nx; i++) { 514 for (k=0; k<nc; k++) { 515 colors[ii++] = k + nc*((3*j+i) % 5); 516 } 517 } 518 } 519 ncolors = 5*nc; 520 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 521 } 522 *coloring = dd->localcoloring; 523 } else if (ctype == IS_COLORING_LOCAL) { 524 if (!dd->ghostedcoloring) { 525 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 526 ii = 0; 527 for (j=gys; j<gys+gny; j++) { 528 for (i=gxs; i<gxs+gnx; i++) { 529 for (k=0; k<nc; k++) { 530 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 531 } 532 } 533 } 534 ncolors = 5*nc; 535 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 536 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 537 } 538 *coloring = dd->ghostedcoloring; 539 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 540 PetscFunctionReturn(0); 541 } 542 543 /* =========================================================================== */ 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 546 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 557 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 558 559 /*@C 560 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 561 562 Logically Collective on mat 563 564 Input Parameters: 565 + mat - the matrix 566 - da - the da 567 568 Level: intermediate 569 570 @*/ 571 PetscErrorCode MatSetupDM(Mat mat,DM da) 572 { 573 PetscErrorCode ierr; 574 575 PetscFunctionBegin; 576 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 577 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 578 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 583 { 584 DM da; 585 PetscErrorCode ierr; 586 const char *prefix; 587 Mat Anatural; 588 AO ao; 589 PetscInt rstart,rend,*petsc,i; 590 IS is; 591 MPI_Comm comm; 592 PetscViewerFormat format; 593 594 PetscFunctionBegin; 595 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 596 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 597 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 598 599 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 600 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 601 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 602 603 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 604 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 605 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 606 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 607 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 608 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 609 610 /* call viewer on natural ordering */ 611 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 612 ierr = ISDestroy(&is);CHKERRQ(ierr); 613 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 615 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 616 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 617 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 618 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 619 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 624 { 625 DM da; 626 PetscErrorCode ierr; 627 Mat Anatural,Aapp; 628 AO ao; 629 PetscInt rstart,rend,*app,i,m,n,M,N; 630 IS is; 631 MPI_Comm comm; 632 633 PetscFunctionBegin; 634 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 635 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 636 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 637 638 /* Load the matrix in natural ordering */ 639 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 640 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 641 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 643 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 644 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 645 646 /* Map natural ordering to application ordering and create IS */ 647 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 648 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 649 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 650 for (i=rstart; i<rend; i++) app[i-rstart] = i; 651 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 652 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 653 654 /* Do permutation and replace header */ 655 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 656 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 657 ierr = ISDestroy(&is);CHKERRQ(ierr); 658 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 663 { 664 PetscErrorCode ierr; 665 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 666 Mat A; 667 MPI_Comm comm; 668 MatType Atype; 669 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 670 MatType mtype; 671 PetscMPIInt size; 672 DM_DA *dd = (DM_DA*)da->data; 673 674 PetscFunctionBegin; 675 ierr = MatInitializePackage();CHKERRQ(ierr); 676 mtype = da->mattype; 677 678 /* 679 m 680 ------------------------------------------------------ 681 | | 682 | | 683 | ---------------------- | 684 | | | | 685 n | ny | | | 686 | | | | 687 | .--------------------- | 688 | (xs,ys) nx | 689 | . | 690 | (gxs,gys) | 691 | | 692 ----------------------------------------------------- 693 */ 694 695 /* 696 nc - number of components per grid point 697 col - number of colors needed in one direction for single component problem 698 699 */ 700 M = dd->M; 701 N = dd->N; 702 P = dd->P; 703 dim = da->dim; 704 dof = dd->w; 705 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 706 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 707 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 708 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 709 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 710 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 711 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 712 if (dof*nx*ny*nz < da->bind_below) { 713 ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr); 714 ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr); 715 } 716 ierr = MatSetDM(A,da);CHKERRQ(ierr); 717 if (da->structure_only) { 718 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 721 /* 722 We do not provide a getmatrix function in the DMDA operations because 723 the basic DMDA does not know about matrices. We think of DMDA as being more 724 more low-level than matrices. This is kind of cheating but, cause sometimes 725 we think of DMDA has higher level than matrices. 726 727 We could switch based on Atype (or mtype), but we do not since the 728 specialized setting routines depend only on the particular preallocation 729 details of the matrix, not the type itself. 730 */ 731 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 732 if (!aij) { 733 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 734 } 735 if (!aij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 737 if (!baij) { 738 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 739 } 740 if (!baij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 742 if (!sbaij) { 743 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 744 } 745 if (!sbaij) { 746 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 747 if (!sell) { 748 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 749 } 750 } 751 if (!sell) { 752 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 753 } 754 } 755 } 756 if (aij) { 757 if (dim == 1) { 758 if (dd->ofill) { 759 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 760 } else { 761 DMBoundaryType bx; 762 PetscMPIInt size; 763 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 765 if (size == 1 && bx == DM_BOUNDARY_NONE) { 766 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);CHKERRQ(ierr); 767 } else { 768 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr); 769 } 770 } 771 } else if (dim == 2) { 772 if (dd->ofill) { 773 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 774 } else { 775 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr); 776 } 777 } else if (dim == 3) { 778 if (dd->ofill) { 779 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 780 } else { 781 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr); 782 } 783 } 784 } else if (baij) { 785 if (dim == 2) { 786 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 787 } else if (dim == 3) { 788 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 789 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 790 } else if (sbaij) { 791 if (dim == 2) { 792 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 793 } else if (dim == 3) { 794 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 795 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 796 } else if (sell) { 797 if (dim == 2) { 798 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 799 } else if (dim == 3) { 800 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 801 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 802 } else if (is) { 803 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 804 } else { 805 ISLocalToGlobalMapping ltog; 806 807 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 808 ierr = MatSetUp(A);CHKERRQ(ierr); 809 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 810 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 811 } 812 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 813 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 814 ierr = MatSetDM(A,da);CHKERRQ(ierr); 815 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 816 if (size > 1) { 817 /* change viewer to display matrix in natural ordering */ 818 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 819 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 820 } 821 *J = A; 822 PetscFunctionReturn(0); 823 } 824 825 /* ---------------------------------------------------------------------------------*/ 826 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 827 828 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 829 { 830 DM_DA *da = (DM_DA*)dm->data; 831 Mat lJ,P; 832 ISLocalToGlobalMapping ltog; 833 IS is; 834 PetscBT bt; 835 const PetscInt *e_loc,*idx; 836 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 837 PetscErrorCode ierr; 838 839 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 840 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 841 PetscFunctionBegin; 842 dof = da->w; 843 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 844 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 845 846 /* flag local elements indices in local DMDA numbering */ 847 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 848 ierr = PetscBTCreate(nv/dof,&bt);CHKERRQ(ierr); 849 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 850 for (i=0;i<nel*nen;i++) { ierr = PetscBTSet(bt,e_loc[i]);CHKERRQ(ierr); } 851 852 /* filter out (set to -1) the global indices not used by the local elements */ 853 ierr = PetscMalloc1(nv/dof,&gidx);CHKERRQ(ierr); 854 ierr = ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);CHKERRQ(ierr); 855 ierr = PetscArraycpy(gidx,idx,nv/dof);CHKERRQ(ierr); 856 ierr = ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);CHKERRQ(ierr); 857 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 858 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 859 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 860 ierr = ISLocalToGlobalMappingCreateIS(is,<og);CHKERRQ(ierr); 861 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 862 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 863 ierr = ISDestroy(&is);CHKERRQ(ierr); 864 865 /* Preallocation */ 866 if (dm->prealloc_skip) { 867 ierr = MatSetUp(J);CHKERRQ(ierr); 868 } else { 869 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 870 ierr = MatGetLocalToGlobalMapping(lJ,<og,NULL);CHKERRQ(ierr); 871 ierr = MatCreate(PetscObjectComm((PetscObject)lJ),&P);CHKERRQ(ierr); 872 ierr = MatSetType(P,MATPREALLOCATOR);CHKERRQ(ierr); 873 ierr = MatSetLocalToGlobalMapping(P,ltog,ltog);CHKERRQ(ierr); 874 ierr = MatGetSize(lJ,&N,NULL);CHKERRQ(ierr); 875 ierr = MatGetLocalSize(lJ,&n,NULL);CHKERRQ(ierr); 876 ierr = MatSetSizes(P,n,n,N,N);CHKERRQ(ierr); 877 ierr = MatSetBlockSize(P,dof);CHKERRQ(ierr); 878 ierr = MatSetUp(P);CHKERRQ(ierr); 879 for (i=0;i<nel;i++) { 880 ierr = MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);CHKERRQ(ierr); 883 ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr); 884 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 885 ierr = MatDestroy(&P);CHKERRQ(ierr); 886 887 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } 890 PetscFunctionReturn(0); 891 } 892 893 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 894 { 895 PetscErrorCode ierr; 896 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 897 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 898 MPI_Comm comm; 899 PetscScalar *values; 900 DMBoundaryType bx,by; 901 ISLocalToGlobalMapping ltog; 902 DMDAStencilType st; 903 904 PetscFunctionBegin; 905 /* 906 nc - number of components per grid point 907 col - number of colors needed in one direction for single component problem 908 909 */ 910 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 911 col = 2*s + 1; 912 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 913 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 914 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 915 916 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 917 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 918 919 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 920 /* determine the matrix preallocation information */ 921 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 922 for (i=xs; i<xs+nx; i++) { 923 924 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 925 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 926 927 for (j=ys; j<ys+ny; j++) { 928 slot = i - gxs + gnx*(j - gys); 929 930 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 931 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 932 933 cnt = 0; 934 for (k=0; k<nc; k++) { 935 for (l=lstart; l<lend+1; l++) { 936 for (p=pstart; p<pend+1; p++) { 937 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 938 cols[cnt++] = k + nc*(slot + gnx*l + p); 939 } 940 } 941 } 942 rows[k] = k + nc*(slot); 943 } 944 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 945 } 946 } 947 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 948 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 949 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 950 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 951 952 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 953 954 /* 955 For each node in the grid: we get the neighbors in the local (on processor ordering 956 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 957 PETSc ordering. 958 */ 959 if (!da->prealloc_only) { 960 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 961 for (i=xs; i<xs+nx; i++) { 962 963 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 964 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 965 966 for (j=ys; j<ys+ny; j++) { 967 slot = i - gxs + gnx*(j - gys); 968 969 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 970 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 971 972 cnt = 0; 973 for (k=0; k<nc; k++) { 974 for (l=lstart; l<lend+1; l++) { 975 for (p=pstart; p<pend+1; p++) { 976 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 977 cols[cnt++] = k + nc*(slot + gnx*l + p); 978 } 979 } 980 } 981 rows[k] = k + nc*(slot); 982 } 983 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(values);CHKERRQ(ierr); 987 /* do not copy values to GPU since they are all zero and not yet needed there */ 988 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 992 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 993 } 994 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 999 { 1000 PetscErrorCode ierr; 1001 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1002 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1003 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1004 MPI_Comm comm; 1005 PetscScalar *values; 1006 DMBoundaryType bx,by,bz; 1007 ISLocalToGlobalMapping ltog; 1008 DMDAStencilType st; 1009 1010 PetscFunctionBegin; 1011 /* 1012 nc - number of components per grid point 1013 col - number of colors needed in one direction for single component problem 1014 1015 */ 1016 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1017 col = 2*s + 1; 1018 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1019 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1020 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1021 1022 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1023 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1024 1025 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1026 /* determine the matrix preallocation information */ 1027 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1028 for (i=xs; i<xs+nx; i++) { 1029 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1030 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1031 for (j=ys; j<ys+ny; j++) { 1032 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1033 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1034 for (k=zs; k<zs+nz; k++) { 1035 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1036 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1037 1038 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1039 1040 cnt = 0; 1041 for (l=0; l<nc; l++) { 1042 for (ii=istart; ii<iend+1; ii++) { 1043 for (jj=jstart; jj<jend+1; jj++) { 1044 for (kk=kstart; kk<kend+1; kk++) { 1045 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1046 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1047 } 1048 } 1049 } 1050 } 1051 rows[l] = l + nc*(slot); 1052 } 1053 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1054 } 1055 } 1056 } 1057 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1058 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1059 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1060 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1061 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1062 1063 /* 1064 For each node in the grid: we get the neighbors in the local (on processor ordering 1065 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1066 PETSc ordering. 1067 */ 1068 if (!da->prealloc_only) { 1069 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1070 for (i=xs; i<xs+nx; i++) { 1071 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1072 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1073 for (j=ys; j<ys+ny; j++) { 1074 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1075 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1076 for (k=zs; k<zs+nz; k++) { 1077 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1078 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1079 1080 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1081 1082 cnt = 0; 1083 for (l=0; l<nc; l++) { 1084 for (ii=istart; ii<iend+1; ii++) { 1085 for (jj=jstart; jj<jend+1; jj++) { 1086 for (kk=kstart; kk<kend+1; kk++) { 1087 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1088 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1089 } 1090 } 1091 } 1092 } 1093 rows[l] = l + nc*(slot); 1094 } 1095 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1096 } 1097 } 1098 } 1099 ierr = PetscFree(values);CHKERRQ(ierr); 1100 /* do not copy values to GPU since they are all zero and not yet needed there */ 1101 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1102 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1103 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1104 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1105 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1106 } 1107 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1112 { 1113 PetscErrorCode ierr; 1114 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1115 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1116 MPI_Comm comm; 1117 DMBoundaryType bx,by; 1118 ISLocalToGlobalMapping ltog,mltog; 1119 DMDAStencilType st; 1120 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1121 1122 PetscFunctionBegin; 1123 /* 1124 nc - number of components per grid point 1125 col - number of colors needed in one direction for single component problem 1126 1127 */ 1128 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1129 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1130 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1131 } 1132 col = 2*s + 1; 1133 /* 1134 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1135 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1136 */ 1137 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1138 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1139 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1140 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1142 1143 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1144 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1145 1146 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1147 /* determine the matrix preallocation information */ 1148 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1149 for (i=xs; i<xs+nx; i++) { 1150 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1151 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1152 1153 for (j=ys; j<ys+ny; j++) { 1154 slot = i - gxs + gnx*(j - gys); 1155 1156 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1157 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1158 1159 cnt = 0; 1160 for (k=0; k<nc; k++) { 1161 for (l=lstart; l<lend+1; l++) { 1162 for (p=pstart; p<pend+1; p++) { 1163 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1164 cols[cnt++] = k + nc*(slot + gnx*l + p); 1165 } 1166 } 1167 } 1168 rows[k] = k + nc*(slot); 1169 } 1170 if (removedups) { 1171 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1172 } else { 1173 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1174 } 1175 } 1176 } 1177 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1178 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1179 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1180 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1181 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1182 if (!mltog) { 1183 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1184 } 1185 1186 /* 1187 For each node in the grid: we get the neighbors in the local (on processor ordering 1188 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1189 PETSc ordering. 1190 */ 1191 if (!da->prealloc_only) { 1192 for (i=xs; i<xs+nx; i++) { 1193 1194 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1195 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1196 1197 for (j=ys; j<ys+ny; j++) { 1198 slot = i - gxs + gnx*(j - gys); 1199 1200 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1201 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1202 1203 cnt = 0; 1204 for (l=lstart; l<lend+1; l++) { 1205 for (p=pstart; p<pend+1; p++) { 1206 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1207 cols[cnt++] = nc*(slot + gnx*l + p); 1208 for (k=1; k<nc; k++) { 1209 cols[cnt] = 1 + cols[cnt-1];cnt++; 1210 } 1211 } 1212 } 1213 } 1214 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1215 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1216 } 1217 } 1218 /* do not copy values to GPU since they are all zero and not yet needed there */ 1219 ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr); 1220 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1221 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1222 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);} 1224 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1225 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1226 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1227 } 1228 } 1229 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1234 { 1235 PetscErrorCode ierr; 1236 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1237 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1238 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1239 DM_DA *dd = (DM_DA*)da->data; 1240 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1241 MPI_Comm comm; 1242 DMBoundaryType bx,by; 1243 ISLocalToGlobalMapping ltog; 1244 DMDAStencilType st; 1245 PetscBool removedups = PETSC_FALSE; 1246 1247 PetscFunctionBegin; 1248 /* 1249 nc - number of components per grid point 1250 col - number of colors needed in one direction for single component problem 1251 1252 */ 1253 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1254 col = 2*s + 1; 1255 /* 1256 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1257 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1258 */ 1259 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1260 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1261 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1262 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1264 1265 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1266 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1267 1268 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1269 /* determine the matrix preallocation information */ 1270 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1271 for (i=xs; i<xs+nx; i++) { 1272 1273 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1274 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1275 1276 for (j=ys; j<ys+ny; j++) { 1277 slot = i - gxs + gnx*(j - gys); 1278 1279 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1280 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1281 1282 for (k=0; k<nc; k++) { 1283 cnt = 0; 1284 for (l=lstart; l<lend+1; l++) { 1285 for (p=pstart; p<pend+1; p++) { 1286 if (l || p) { 1287 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1288 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1289 } 1290 } else { 1291 if (dfill) { 1292 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1293 } else { 1294 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1295 } 1296 } 1297 } 1298 } 1299 row = k + nc*(slot); 1300 maxcnt = PetscMax(maxcnt,cnt); 1301 if (removedups) { 1302 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1303 } else { 1304 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1305 } 1306 } 1307 } 1308 } 1309 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1310 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1311 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1312 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1313 1314 /* 1315 For each node in the grid: we get the neighbors in the local (on processor ordering 1316 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1317 PETSc ordering. 1318 */ 1319 if (!da->prealloc_only) { 1320 for (i=xs; i<xs+nx; i++) { 1321 1322 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1323 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1324 1325 for (j=ys; j<ys+ny; j++) { 1326 slot = i - gxs + gnx*(j - gys); 1327 1328 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1329 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1330 1331 for (k=0; k<nc; k++) { 1332 cnt = 0; 1333 for (l=lstart; l<lend+1; l++) { 1334 for (p=pstart; p<pend+1; p++) { 1335 if (l || p) { 1336 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1337 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1338 } 1339 } else { 1340 if (dfill) { 1341 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1342 } else { 1343 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1344 } 1345 } 1346 } 1347 } 1348 row = k + nc*(slot); 1349 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1350 } 1351 } 1352 } 1353 /* do not copy values to GPU since they are all zero and not yet needed there */ 1354 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1355 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1356 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1357 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1358 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1359 } 1360 ierr = PetscFree(cols);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /* ---------------------------------------------------------------------------------*/ 1365 1366 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1367 { 1368 PetscErrorCode ierr; 1369 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1370 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1371 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1372 MPI_Comm comm; 1373 DMBoundaryType bx,by,bz; 1374 ISLocalToGlobalMapping ltog,mltog; 1375 DMDAStencilType st; 1376 PetscBool removedups = PETSC_FALSE; 1377 1378 PetscFunctionBegin; 1379 /* 1380 nc - number of components per grid point 1381 col - number of colors needed in one direction for single component problem 1382 1383 */ 1384 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1385 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1386 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1387 } 1388 col = 2*s + 1; 1389 1390 /* 1391 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1392 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1393 */ 1394 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1395 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1396 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1397 1398 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1399 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1400 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1401 1402 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1403 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1404 1405 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1406 /* determine the matrix preallocation information */ 1407 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1408 for (i=xs; i<xs+nx; i++) { 1409 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1410 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1411 for (j=ys; j<ys+ny; j++) { 1412 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1413 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1414 for (k=zs; k<zs+nz; k++) { 1415 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1416 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1417 1418 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1419 1420 cnt = 0; 1421 for (l=0; l<nc; l++) { 1422 for (ii=istart; ii<iend+1; ii++) { 1423 for (jj=jstart; jj<jend+1; jj++) { 1424 for (kk=kstart; kk<kend+1; kk++) { 1425 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1426 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1427 } 1428 } 1429 } 1430 } 1431 rows[l] = l + nc*(slot); 1432 } 1433 if (removedups) { 1434 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1435 } else { 1436 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1437 } 1438 } 1439 } 1440 } 1441 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1442 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1443 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1444 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1445 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1446 if (!mltog) { 1447 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1448 } 1449 1450 /* 1451 For each node in the grid: we get the neighbors in the local (on processor ordering 1452 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1453 PETSc ordering. 1454 */ 1455 if (!da->prealloc_only) { 1456 for (i=xs; i<xs+nx; i++) { 1457 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1458 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1459 for (j=ys; j<ys+ny; j++) { 1460 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1461 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1462 for (k=zs; k<zs+nz; k++) { 1463 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1464 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1465 1466 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1467 1468 cnt = 0; 1469 for (kk=kstart; kk<kend+1; kk++) { 1470 for (jj=jstart; jj<jend+1; jj++) { 1471 for (ii=istart; ii<iend+1; ii++) { 1472 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1473 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1474 for (l=1; l<nc; l++) { 1475 cols[cnt] = 1 + cols[cnt-1];cnt++; 1476 } 1477 } 1478 } 1479 } 1480 } 1481 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1482 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1483 } 1484 } 1485 } 1486 /* do not copy values to GPU since they are all zero and not yet needed there */ 1487 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1488 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1489 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1490 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1491 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1492 } 1493 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1494 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1495 } 1496 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 /* ---------------------------------------------------------------------------------*/ 1501 1502 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1503 { 1504 PetscErrorCode ierr; 1505 DM_DA *dd = (DM_DA*)da->data; 1506 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1507 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1508 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1509 DMBoundaryType bx; 1510 ISLocalToGlobalMapping ltog; 1511 PetscMPIInt rank,size; 1512 1513 PetscFunctionBegin; 1514 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1515 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1516 1517 /* 1518 nc - number of components per grid point 1519 1520 */ 1521 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1522 PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1523 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1524 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1525 1526 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1527 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1528 1529 /* 1530 note should be smaller for first and last process with no periodic 1531 does not handle dfill 1532 */ 1533 cnt = 0; 1534 /* coupling with process to the left */ 1535 for (i=0; i<s; i++) { 1536 for (j=0; j<nc; j++) { 1537 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1538 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1539 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1540 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1541 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1542 } 1543 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1544 cnt++; 1545 } 1546 } 1547 for (i=s; i<nx-s; i++) { 1548 for (j=0; j<nc; j++) { 1549 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1550 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1551 cnt++; 1552 } 1553 } 1554 /* coupling with process to the right */ 1555 for (i=nx-s; i<nx; i++) { 1556 for (j=0; j<nc; j++) { 1557 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1558 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1559 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1560 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1561 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1562 } 1563 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1564 cnt++; 1565 } 1566 } 1567 1568 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1569 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1570 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1571 1572 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1573 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1574 1575 /* 1576 For each node in the grid: we get the neighbors in the local (on processor ordering 1577 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1578 PETSc ordering. 1579 */ 1580 if (!da->prealloc_only) { 1581 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1582 row = xs*nc; 1583 /* coupling with process to the left */ 1584 for (i=xs; i<xs+s; i++) { 1585 for (j=0; j<nc; j++) { 1586 cnt = 0; 1587 if (rank) { 1588 for (l=0; l<s; l++) { 1589 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1590 } 1591 } 1592 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1593 for (l=0; l<s; l++) { 1594 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1595 } 1596 } 1597 if (dfill) { 1598 for (k=dfill[j]; k<dfill[j+1]; k++) { 1599 cols[cnt++] = i*nc + dfill[k]; 1600 } 1601 } else { 1602 for (k=0; k<nc; k++) { 1603 cols[cnt++] = i*nc + k; 1604 } 1605 } 1606 for (l=0; l<s; l++) { 1607 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1608 } 1609 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1610 row++; 1611 } 1612 } 1613 for (i=xs+s; i<xs+nx-s; i++) { 1614 for (j=0; j<nc; j++) { 1615 cnt = 0; 1616 for (l=0; l<s; l++) { 1617 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1618 } 1619 if (dfill) { 1620 for (k=dfill[j]; k<dfill[j+1]; k++) { 1621 cols[cnt++] = i*nc + dfill[k]; 1622 } 1623 } else { 1624 for (k=0; k<nc; k++) { 1625 cols[cnt++] = i*nc + k; 1626 } 1627 } 1628 for (l=0; l<s; l++) { 1629 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1630 } 1631 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1632 row++; 1633 } 1634 } 1635 /* coupling with process to the right */ 1636 for (i=xs+nx-s; i<xs+nx; i++) { 1637 for (j=0; j<nc; j++) { 1638 cnt = 0; 1639 for (l=0; l<s; l++) { 1640 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1641 } 1642 if (dfill) { 1643 for (k=dfill[j]; k<dfill[j+1]; k++) { 1644 cols[cnt++] = i*nc + dfill[k]; 1645 } 1646 } else { 1647 for (k=0; k<nc; k++) { 1648 cols[cnt++] = i*nc + k; 1649 } 1650 } 1651 if (rank < size-1) { 1652 for (l=0; l<s; l++) { 1653 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1654 } 1655 } 1656 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1657 for (l=0; l<s; l++) { 1658 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1659 } 1660 } 1661 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1662 row++; 1663 } 1664 } 1665 ierr = PetscFree(cols);CHKERRQ(ierr); 1666 /* do not copy values to GPU since they are all zero and not yet needed there */ 1667 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1668 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1671 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 /* ---------------------------------------------------------------------------------*/ 1677 1678 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1679 { 1680 PetscErrorCode ierr; 1681 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1682 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1683 PetscInt istart,iend; 1684 DMBoundaryType bx; 1685 ISLocalToGlobalMapping ltog,mltog; 1686 1687 PetscFunctionBegin; 1688 /* 1689 nc - number of components per grid point 1690 col - number of colors needed in one direction for single component problem 1691 1692 */ 1693 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1694 if (bx == DM_BOUNDARY_NONE) { 1695 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1696 } 1697 col = 2*s + 1; 1698 1699 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1700 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1701 1702 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1703 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1704 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1705 1706 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1707 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1708 if (!mltog) { 1709 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1710 } 1711 1712 /* 1713 For each node in the grid: we get the neighbors in the local (on processor ordering 1714 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1715 PETSc ordering. 1716 */ 1717 if (!da->prealloc_only) { 1718 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1719 for (i=xs; i<xs+nx; i++) { 1720 istart = PetscMax(-s,gxs - i); 1721 iend = PetscMin(s,gxs + gnx - i - 1); 1722 slot = i - gxs; 1723 1724 cnt = 0; 1725 for (i1=istart; i1<iend+1; i1++) { 1726 cols[cnt++] = nc*(slot + i1); 1727 for (l=1; l<nc; l++) { 1728 cols[cnt] = 1 + cols[cnt-1];cnt++; 1729 } 1730 } 1731 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1732 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1733 } 1734 /* do not copy values to GPU since they are all zero and not yet needed there */ 1735 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1736 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1738 if (bx == DM_BOUNDARY_NONE) { 1739 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1740 } 1741 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1742 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1743 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 /* ---------------------------------------------------------------------------------*/ 1749 1750 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1751 { 1752 PetscErrorCode ierr; 1753 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1754 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1755 PetscInt istart,iend; 1756 DMBoundaryType bx; 1757 ISLocalToGlobalMapping ltog,mltog; 1758 1759 PetscFunctionBegin; 1760 /* 1761 nc - number of components per grid point 1762 col - number of colors needed in one direction for single component problem 1763 */ 1764 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1765 col = 2*s + 1; 1766 1767 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1768 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1769 1770 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1771 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1772 1773 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1774 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1775 if (!mltog) { 1776 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1777 } 1778 1779 /* 1780 For each node in the grid: we get the neighbors in the local (on processor ordering 1781 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1782 PETSc ordering. 1783 */ 1784 if (!da->prealloc_only) { 1785 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1786 for (i=xs; i<xs+nx; i++) { 1787 istart = PetscMax(-s,gxs - i); 1788 iend = PetscMin(s,gxs + gnx - i - 1); 1789 slot = i - gxs; 1790 1791 cnt = 0; 1792 for (i1=istart; i1<iend+1; i1++) { 1793 cols[cnt++] = nc*(slot + i1); 1794 for (l=1; l<nc; l++) { 1795 cols[cnt] = 1 + cols[cnt-1];cnt++; 1796 } 1797 } 1798 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1799 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1800 } 1801 /* do not copy values to GPU since they are all zero and not yet needed there */ 1802 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1803 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1804 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1805 if (bx == DM_BOUNDARY_NONE) { 1806 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1807 } 1808 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1809 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1810 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1811 } 1812 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1817 { 1818 PetscErrorCode ierr; 1819 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1820 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1821 PetscInt istart,iend,jstart,jend,ii,jj; 1822 MPI_Comm comm; 1823 PetscScalar *values; 1824 DMBoundaryType bx,by; 1825 DMDAStencilType st; 1826 ISLocalToGlobalMapping ltog; 1827 1828 PetscFunctionBegin; 1829 /* 1830 nc - number of components per grid point 1831 col - number of colors needed in one direction for single component problem 1832 */ 1833 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1834 col = 2*s + 1; 1835 1836 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1837 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1838 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1839 1840 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1841 1842 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1843 1844 /* determine the matrix preallocation information */ 1845 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1846 for (i=xs; i<xs+nx; i++) { 1847 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1848 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1849 for (j=ys; j<ys+ny; j++) { 1850 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1851 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1852 slot = i - gxs + gnx*(j - gys); 1853 1854 /* Find block columns in block row */ 1855 cnt = 0; 1856 for (ii=istart; ii<iend+1; ii++) { 1857 for (jj=jstart; jj<jend+1; jj++) { 1858 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1859 cols[cnt++] = slot + ii + gnx*jj; 1860 } 1861 } 1862 } 1863 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1864 } 1865 } 1866 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1867 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1868 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1869 1870 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1871 1872 /* 1873 For each node in the grid: we get the neighbors in the local (on processor ordering 1874 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1875 PETSc ordering. 1876 */ 1877 if (!da->prealloc_only) { 1878 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1879 for (i=xs; i<xs+nx; i++) { 1880 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1881 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1882 for (j=ys; j<ys+ny; j++) { 1883 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1884 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1885 slot = i - gxs + gnx*(j - gys); 1886 cnt = 0; 1887 for (ii=istart; ii<iend+1; ii++) { 1888 for (jj=jstart; jj<jend+1; jj++) { 1889 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1890 cols[cnt++] = slot + ii + gnx*jj; 1891 } 1892 } 1893 } 1894 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1895 } 1896 } 1897 ierr = PetscFree(values);CHKERRQ(ierr); 1898 /* do not copy values to GPU since they are all zero and not yet needed there */ 1899 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1900 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1903 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1904 } 1905 ierr = PetscFree(cols);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1910 { 1911 PetscErrorCode ierr; 1912 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1913 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1914 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1915 MPI_Comm comm; 1916 PetscScalar *values; 1917 DMBoundaryType bx,by,bz; 1918 DMDAStencilType st; 1919 ISLocalToGlobalMapping ltog; 1920 1921 PetscFunctionBegin; 1922 /* 1923 nc - number of components per grid point 1924 col - number of colors needed in one direction for single component problem 1925 1926 */ 1927 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1928 col = 2*s + 1; 1929 1930 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1931 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1932 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1933 1934 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1935 1936 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1937 1938 /* determine the matrix preallocation information */ 1939 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1940 for (i=xs; i<xs+nx; i++) { 1941 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1942 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1943 for (j=ys; j<ys+ny; j++) { 1944 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1945 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1946 for (k=zs; k<zs+nz; k++) { 1947 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1948 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1949 1950 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1951 1952 /* Find block columns in block row */ 1953 cnt = 0; 1954 for (ii=istart; ii<iend+1; ii++) { 1955 for (jj=jstart; jj<jend+1; jj++) { 1956 for (kk=kstart; kk<kend+1; kk++) { 1957 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1958 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1959 } 1960 } 1961 } 1962 } 1963 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1964 } 1965 } 1966 } 1967 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1968 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1969 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1970 1971 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1972 1973 /* 1974 For each node in the grid: we get the neighbors in the local (on processor ordering 1975 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1976 PETSc ordering. 1977 */ 1978 if (!da->prealloc_only) { 1979 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1980 for (i=xs; i<xs+nx; i++) { 1981 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1982 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1983 for (j=ys; j<ys+ny; j++) { 1984 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1985 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1986 for (k=zs; k<zs+nz; k++) { 1987 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1988 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1989 1990 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1991 1992 cnt = 0; 1993 for (ii=istart; ii<iend+1; ii++) { 1994 for (jj=jstart; jj<jend+1; jj++) { 1995 for (kk=kstart; kk<kend+1; kk++) { 1996 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1997 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1998 } 1999 } 2000 } 2001 } 2002 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2003 } 2004 } 2005 } 2006 ierr = PetscFree(values);CHKERRQ(ierr); 2007 /* do not copy values to GPU since they are all zero and not yet needed there */ 2008 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2009 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2011 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2012 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2013 } 2014 ierr = PetscFree(cols);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 /* 2019 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2020 identify in the local ordering with periodic domain. 2021 */ 2022 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2023 { 2024 PetscErrorCode ierr; 2025 PetscInt i,n; 2026 2027 PetscFunctionBegin; 2028 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2029 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2030 for (i=0,n=0; i<*cnt; i++) { 2031 if (col[i] >= *row) col[n++] = col[i]; 2032 } 2033 *cnt = n; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2038 { 2039 PetscErrorCode ierr; 2040 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2041 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2042 PetscInt istart,iend,jstart,jend,ii,jj; 2043 MPI_Comm comm; 2044 PetscScalar *values; 2045 DMBoundaryType bx,by; 2046 DMDAStencilType st; 2047 ISLocalToGlobalMapping ltog; 2048 2049 PetscFunctionBegin; 2050 /* 2051 nc - number of components per grid point 2052 col - number of colors needed in one direction for single component problem 2053 */ 2054 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2055 col = 2*s + 1; 2056 2057 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2058 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2059 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2060 2061 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2062 2063 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2064 2065 /* determine the matrix preallocation information */ 2066 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2067 for (i=xs; i<xs+nx; i++) { 2068 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2069 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2070 for (j=ys; j<ys+ny; j++) { 2071 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2072 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2073 slot = i - gxs + gnx*(j - gys); 2074 2075 /* Find block columns in block row */ 2076 cnt = 0; 2077 for (ii=istart; ii<iend+1; ii++) { 2078 for (jj=jstart; jj<jend+1; jj++) { 2079 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2080 cols[cnt++] = slot + ii + gnx*jj; 2081 } 2082 } 2083 } 2084 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2085 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2086 } 2087 } 2088 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2089 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2090 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2091 2092 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2093 2094 /* 2095 For each node in the grid: we get the neighbors in the local (on processor ordering 2096 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2097 PETSc ordering. 2098 */ 2099 if (!da->prealloc_only) { 2100 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2101 for (i=xs; i<xs+nx; i++) { 2102 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2103 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2104 for (j=ys; j<ys+ny; j++) { 2105 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2106 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2107 slot = i - gxs + gnx*(j - gys); 2108 2109 /* Find block columns in block row */ 2110 cnt = 0; 2111 for (ii=istart; ii<iend+1; ii++) { 2112 for (jj=jstart; jj<jend+1; jj++) { 2113 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2114 cols[cnt++] = slot + ii + gnx*jj; 2115 } 2116 } 2117 } 2118 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2119 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2120 } 2121 } 2122 ierr = PetscFree(values);CHKERRQ(ierr); 2123 /* do not copy values to GPU since they are all zero and not yet needed there */ 2124 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2125 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2127 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2128 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2129 } 2130 ierr = PetscFree(cols);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2135 { 2136 PetscErrorCode ierr; 2137 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2138 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2139 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2140 MPI_Comm comm; 2141 PetscScalar *values; 2142 DMBoundaryType bx,by,bz; 2143 DMDAStencilType st; 2144 ISLocalToGlobalMapping ltog; 2145 2146 PetscFunctionBegin; 2147 /* 2148 nc - number of components per grid point 2149 col - number of colors needed in one direction for single component problem 2150 */ 2151 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2152 col = 2*s + 1; 2153 2154 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2155 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2156 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2157 2158 /* create the matrix */ 2159 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2160 2161 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2162 2163 /* determine the matrix preallocation information */ 2164 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2165 for (i=xs; i<xs+nx; i++) { 2166 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2167 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2168 for (j=ys; j<ys+ny; j++) { 2169 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2170 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2171 for (k=zs; k<zs+nz; k++) { 2172 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2173 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2174 2175 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2176 2177 /* Find block columns in block row */ 2178 cnt = 0; 2179 for (ii=istart; ii<iend+1; ii++) { 2180 for (jj=jstart; jj<jend+1; jj++) { 2181 for (kk=kstart; kk<kend+1; kk++) { 2182 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2183 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2184 } 2185 } 2186 } 2187 } 2188 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2189 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2190 } 2191 } 2192 } 2193 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2194 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2195 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2196 2197 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2198 2199 /* 2200 For each node in the grid: we get the neighbors in the local (on processor ordering 2201 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2202 PETSc ordering. 2203 */ 2204 if (!da->prealloc_only) { 2205 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2206 for (i=xs; i<xs+nx; i++) { 2207 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2208 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2209 for (j=ys; j<ys+ny; j++) { 2210 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2211 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2212 for (k=zs; k<zs+nz; k++) { 2213 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2214 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2215 2216 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2217 2218 cnt = 0; 2219 for (ii=istart; ii<iend+1; ii++) { 2220 for (jj=jstart; jj<jend+1; jj++) { 2221 for (kk=kstart; kk<kend+1; kk++) { 2222 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2223 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2224 } 2225 } 2226 } 2227 } 2228 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2229 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2230 } 2231 } 2232 } 2233 ierr = PetscFree(values);CHKERRQ(ierr); 2234 /* do not copy values to GPU since they are all zero and not yet needed there */ 2235 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2236 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2237 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2238 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2239 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2240 } 2241 ierr = PetscFree(cols);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 /* ---------------------------------------------------------------------------------*/ 2246 2247 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2248 { 2249 PetscErrorCode ierr; 2250 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2251 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2252 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2253 DM_DA *dd = (DM_DA*)da->data; 2254 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2255 MPI_Comm comm; 2256 PetscScalar *values; 2257 DMBoundaryType bx,by,bz; 2258 ISLocalToGlobalMapping ltog; 2259 DMDAStencilType st; 2260 PetscBool removedups = PETSC_FALSE; 2261 2262 PetscFunctionBegin; 2263 /* 2264 nc - number of components per grid point 2265 col - number of colors needed in one direction for single component problem 2266 2267 */ 2268 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2269 col = 2*s + 1; 2270 PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2271 by 2*stencil_width + 1\n"); 2272 PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2273 by 2*stencil_width + 1\n"); 2274 PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2275 by 2*stencil_width + 1\n"); 2276 2277 /* 2278 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2279 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2280 */ 2281 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2282 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2283 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2284 2285 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2286 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2287 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2288 2289 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2290 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2291 2292 /* determine the matrix preallocation information */ 2293 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2294 2295 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2296 for (i=xs; i<xs+nx; i++) { 2297 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2298 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2299 for (j=ys; j<ys+ny; j++) { 2300 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2301 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2302 for (k=zs; k<zs+nz; k++) { 2303 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2304 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2305 2306 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2307 2308 for (l=0; l<nc; l++) { 2309 cnt = 0; 2310 for (ii=istart; ii<iend+1; ii++) { 2311 for (jj=jstart; jj<jend+1; jj++) { 2312 for (kk=kstart; kk<kend+1; kk++) { 2313 if (ii || jj || kk) { 2314 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2315 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2316 } 2317 } else { 2318 if (dfill) { 2319 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2320 } else { 2321 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2322 } 2323 } 2324 } 2325 } 2326 } 2327 row = l + nc*(slot); 2328 maxcnt = PetscMax(maxcnt,cnt); 2329 if (removedups) { 2330 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2331 } else { 2332 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2333 } 2334 } 2335 } 2336 } 2337 } 2338 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2339 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2340 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2341 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2342 2343 /* 2344 For each node in the grid: we get the neighbors in the local (on processor ordering 2345 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2346 PETSc ordering. 2347 */ 2348 if (!da->prealloc_only) { 2349 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2350 for (i=xs; i<xs+nx; i++) { 2351 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2352 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2353 for (j=ys; j<ys+ny; j++) { 2354 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2355 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2356 for (k=zs; k<zs+nz; k++) { 2357 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2358 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2359 2360 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2361 2362 for (l=0; l<nc; l++) { 2363 cnt = 0; 2364 for (ii=istart; ii<iend+1; ii++) { 2365 for (jj=jstart; jj<jend+1; jj++) { 2366 for (kk=kstart; kk<kend+1; kk++) { 2367 if (ii || jj || kk) { 2368 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2369 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2370 } 2371 } else { 2372 if (dfill) { 2373 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2374 } else { 2375 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2376 } 2377 } 2378 } 2379 } 2380 } 2381 row = l + nc*(slot); 2382 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2383 } 2384 } 2385 } 2386 } 2387 ierr = PetscFree(values);CHKERRQ(ierr); 2388 /* do not copy values to GPU since they are all zero and not yet needed there */ 2389 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2390 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2393 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2394 } 2395 ierr = PetscFree(cols);CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398