1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscErrorCode ierr; 20 PetscInt i,j,nz,*fill; 21 22 PetscFunctionBegin; 23 if (!dfill) PetscFunctionReturn(0); 24 25 /* count number nonzeros */ 26 nz = 0; 27 for (i=0; i<w; i++) { 28 for (j=0; j<w; j++) { 29 if (dfill[w*i+j]) nz++; 30 } 31 } 32 ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr); 33 /* construct modified CSR storage of nonzero structure */ 34 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 35 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 36 nz = w + 1; 37 for (i=0; i<w; i++) { 38 fill[i] = nz; 39 for (j=0; j<w; j++) { 40 if (dfill[w*i+j]) { 41 fill[nz] = j; 42 nz++; 43 } 44 } 45 } 46 fill[w] = nz; 47 48 *rfill = fill; 49 PetscFunctionReturn(0); 50 } 51 52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 53 { 54 PetscErrorCode ierr; 55 PetscInt nz; 56 57 PetscFunctionBegin; 58 if (!dfillsparse) PetscFunctionReturn(0); 59 60 /* Determine number of non-zeros */ 61 nz = (dfillsparse[w] - w - 1); 62 63 /* Allocate space for our copy of the given sparse matrix representation. */ 64 ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr); 65 ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 70 { 71 PetscErrorCode ierr; 72 PetscInt i,k,cnt = 1; 73 74 PetscFunctionBegin; 75 76 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 77 columns to the left with any nonzeros in them plus 1 */ 78 ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr); 79 for (i=0; i<dd->w; i++) { 80 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 81 } 82 for (i=0; i<dd->w; i++) { 83 if (dd->ofillcols[i]) { 84 dd->ofillcols[i] = cnt++; 85 } 86 } 87 PetscFunctionReturn(0); 88 } 89 90 /*@ 91 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 92 of the matrix returned by DMCreateMatrix(). 93 94 Logically Collective on da 95 96 Input Parameters: 97 + da - the distributed array 98 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 99 - ofill - the fill pattern in the off-diagonal blocks 100 101 Level: developer 102 103 Notes: 104 This only makes sense when you are doing multicomponent problems but using the 105 MPIAIJ matrix format 106 107 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 108 representing coupling and 0 entries for missing coupling. For example 109 $ dfill[9] = {1, 0, 0, 110 $ 1, 1, 0, 111 $ 0, 1, 1} 112 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 113 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 114 diagonal block). 115 116 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 117 can be represented in the dfill, ofill format 118 119 Contributed by Glenn Hammond 120 121 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 122 123 @*/ 124 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 125 { 126 DM_DA *dd = (DM_DA*)da->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 /* save the given dfill and ofill information */ 131 ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr); 132 ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr); 133 134 /* count nonzeros in ofill columns */ 135 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 136 137 PetscFunctionReturn(0); 138 } 139 140 /*@ 141 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 142 of the matrix returned by DMCreateMatrix(), using sparse representations 143 of fill patterns. 144 145 Logically Collective on da 146 147 Input Parameters: 148 + da - the distributed array 149 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 150 - ofill - the sparse fill pattern in the off-diagonal blocks 151 152 Level: developer 153 154 Notes: This only makes sense when you are doing multicomponent problems but using the 155 MPIAIJ matrix format 156 157 The format for dfill and ofill is a sparse representation of a 158 dof-by-dof matrix with 1 entries representing coupling and 0 entries 159 for missing coupling. The sparse representation is a 1 dimensional 160 array of length nz + dof + 1, where nz is the number of non-zeros in 161 the matrix. The first dof entries in the array give the 162 starting array indices of each row's items in the rest of the array, 163 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 164 and the remaining nz items give the column indices of each of 165 the 1s within the logical 2D matrix. Each row's items within 166 the array are the column indices of the 1s within that row 167 of the 2D matrix. PETSc developers may recognize that this is the 168 same format as that computed by the DMDASetBlockFills_Private() 169 function from a dense 2D matrix representation. 170 171 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 172 can be represented in the dfill, ofill format 173 174 Contributed by Philip C. Roth 175 176 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 177 178 @*/ 179 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 180 { 181 DM_DA *dd = (DM_DA*)da->data; 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 /* save the given dfill and ofill information */ 186 ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr); 187 ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr); 188 189 /* count nonzeros in ofill columns */ 190 ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr); 191 192 PetscFunctionReturn(0); 193 } 194 195 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 196 { 197 PetscErrorCode ierr; 198 PetscInt dim,m,n,p,nc; 199 DMBoundaryType bx,by,bz; 200 MPI_Comm comm; 201 PetscMPIInt size; 202 PetscBool isBAIJ; 203 DM_DA *dd = (DM_DA*)da->data; 204 205 PetscFunctionBegin; 206 /* 207 m 208 ------------------------------------------------------ 209 | | 210 | | 211 | ---------------------- | 212 | | | | 213 n | yn | | | 214 | | | | 215 | .--------------------- | 216 | (xs,ys) xn | 217 | . | 218 | (gxs,gys) | 219 | | 220 ----------------------------------------------------- 221 */ 222 223 /* 224 nc - number of components per grid point 225 col - number of colors needed in one direction for single component problem 226 227 */ 228 ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr); 229 230 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 231 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 232 if (ctype == IS_COLORING_LOCAL) { 233 if (size == 1) { 234 ctype = IS_COLORING_GLOBAL; 235 } else if (dim > 1) { 236 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 237 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 238 } 239 } 240 } 241 242 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 243 matrices is for the blocks, not the individual matrix elements */ 244 ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr); 245 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);} 246 if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);} 247 if (isBAIJ) { 248 dd->w = 1; 249 dd->xs = dd->xs/nc; 250 dd->xe = dd->xe/nc; 251 dd->Xs = dd->Xs/nc; 252 dd->Xe = dd->Xe/nc; 253 } 254 255 /* 256 We do not provide a getcoloring function in the DMDA operations because 257 the basic DMDA does not know about matrices. We think of DMDA as being 258 more low-level then matrices. 259 */ 260 if (dim == 1) { 261 ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 262 } else if (dim == 2) { 263 ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 264 } else if (dim == 3) { 265 ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 266 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 267 if (isBAIJ) { 268 dd->w = nc; 269 dd->xs = dd->xs*nc; 270 dd->xe = dd->xe*nc; 271 dd->Xs = dd->Xs*nc; 272 dd->Xe = dd->Xe*nc; 273 } 274 PetscFunctionReturn(0); 275 } 276 277 /* ---------------------------------------------------------------------------------*/ 278 279 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 280 { 281 PetscErrorCode ierr; 282 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 283 PetscInt ncolors; 284 MPI_Comm comm; 285 DMBoundaryType bx,by; 286 DMDAStencilType st; 287 ISColoringValue *colors; 288 DM_DA *dd = (DM_DA*)da->data; 289 290 PetscFunctionBegin; 291 /* 292 nc - number of components per grid point 293 col - number of colors needed in one direction for single component problem 294 295 */ 296 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 297 col = 2*s + 1; 298 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 299 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 300 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 301 302 /* special case as taught to us by Paul Hovland */ 303 if (st == DMDA_STENCIL_STAR && s == 1) { 304 ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr); 305 } else { 306 if (ctype == IS_COLORING_GLOBAL) { 307 if (!dd->localcoloring) { 308 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 309 ii = 0; 310 for (j=ys; j<ys+ny; j++) { 311 for (i=xs; i<xs+nx; i++) { 312 for (k=0; k<nc; k++) { 313 colors[ii++] = k + nc*((i % col) + col*(j % col)); 314 } 315 } 316 } 317 ncolors = nc + nc*(col-1 + col*(col-1)); 318 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 319 } 320 *coloring = dd->localcoloring; 321 } else if (ctype == IS_COLORING_LOCAL) { 322 if (!dd->ghostedcoloring) { 323 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 324 ii = 0; 325 for (j=gys; j<gys+gny; j++) { 326 for (i=gxs; i<gxs+gnx; i++) { 327 for (k=0; k<nc; k++) { 328 /* the complicated stuff is to handle periodic boundaries */ 329 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 330 } 331 } 332 } 333 ncolors = nc + nc*(col - 1 + col*(col-1)); 334 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 335 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 336 337 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 338 } 339 *coloring = dd->ghostedcoloring; 340 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 341 } 342 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 /* ---------------------------------------------------------------------------------*/ 347 348 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 349 { 350 PetscErrorCode ierr; 351 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 352 PetscInt ncolors; 353 MPI_Comm comm; 354 DMBoundaryType bx,by,bz; 355 DMDAStencilType st; 356 ISColoringValue *colors; 357 DM_DA *dd = (DM_DA*)da->data; 358 359 PetscFunctionBegin; 360 /* 361 nc - number of components per grid point 362 col - number of colors needed in one direction for single component problem 363 364 */ 365 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 366 col = 2*s + 1; 367 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 368 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 369 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 370 371 /* create the coloring */ 372 if (ctype == IS_COLORING_GLOBAL) { 373 if (!dd->localcoloring) { 374 ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr); 375 ii = 0; 376 for (k=zs; k<zs+nz; k++) { 377 for (j=ys; j<ys+ny; j++) { 378 for (i=xs; i<xs+nx; i++) { 379 for (l=0; l<nc; l++) { 380 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 386 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 387 } 388 *coloring = dd->localcoloring; 389 } else if (ctype == IS_COLORING_LOCAL) { 390 if (!dd->ghostedcoloring) { 391 ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr); 392 ii = 0; 393 for (k=gzs; k<gzs+gnz; k++) { 394 for (j=gys; j<gys+gny; j++) { 395 for (i=gxs; i<gxs+gnx; i++) { 396 for (l=0; l<nc; l++) { 397 /* the complicated stuff is to handle periodic boundaries */ 398 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 399 } 400 } 401 } 402 } 403 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 404 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 405 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 406 } 407 *coloring = dd->ghostedcoloring; 408 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 409 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 /* ---------------------------------------------------------------------------------*/ 414 415 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 416 { 417 PetscErrorCode ierr; 418 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 419 PetscInt ncolors; 420 MPI_Comm comm; 421 DMBoundaryType bx; 422 ISColoringValue *colors; 423 DM_DA *dd = (DM_DA*)da->data; 424 425 PetscFunctionBegin; 426 /* 427 nc - number of components per grid point 428 col - number of colors needed in one direction for single component problem 429 430 */ 431 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 432 col = 2*s + 1; 433 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 434 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 435 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 436 437 /* create the coloring */ 438 if (ctype == IS_COLORING_GLOBAL) { 439 if (!dd->localcoloring) { 440 ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr); 441 if (dd->ofillcols) { 442 PetscInt tc = 0; 443 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 444 i1 = 0; 445 for (i=xs; i<xs+nx; i++) { 446 for (l=0; l<nc; l++) { 447 if (dd->ofillcols[l] && (i % col)) { 448 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 449 } else { 450 colors[i1++] = l; 451 } 452 } 453 } 454 ncolors = nc + 2*s*tc; 455 } else { 456 i1 = 0; 457 for (i=xs; i<xs+nx; i++) { 458 for (l=0; l<nc; l++) { 459 colors[i1++] = l + nc*(i % col); 460 } 461 } 462 ncolors = nc + nc*(col-1); 463 } 464 ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 465 } 466 *coloring = dd->localcoloring; 467 } else if (ctype == IS_COLORING_LOCAL) { 468 if (!dd->ghostedcoloring) { 469 ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr); 470 i1 = 0; 471 for (i=gxs; i<gxs+gnx; i++) { 472 for (l=0; l<nc; l++) { 473 /* the complicated stuff is to handle periodic boundaries */ 474 colors[i1++] = l + nc*(SetInRange(i,m) % col); 475 } 476 } 477 ncolors = nc + nc*(col-1); 478 ierr = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 479 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 480 } 481 *coloring = dd->ghostedcoloring; 482 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 483 ierr = ISColoringReference(*coloring);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 488 { 489 PetscErrorCode ierr; 490 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 491 PetscInt ncolors; 492 MPI_Comm comm; 493 DMBoundaryType bx,by; 494 ISColoringValue *colors; 495 DM_DA *dd = (DM_DA*)da->data; 496 497 PetscFunctionBegin; 498 /* 499 nc - number of components per grid point 500 col - number of colors needed in one direction for single component problem 501 502 */ 503 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr); 504 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 505 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 506 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 507 /* create the coloring */ 508 if (ctype == IS_COLORING_GLOBAL) { 509 if (!dd->localcoloring) { 510 ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr); 511 ii = 0; 512 for (j=ys; j<ys+ny; j++) { 513 for (i=xs; i<xs+nx; i++) { 514 for (k=0; k<nc; k++) { 515 colors[ii++] = k + nc*((3*j+i) % 5); 516 } 517 } 518 } 519 ncolors = 5*nc; 520 ierr = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr); 521 } 522 *coloring = dd->localcoloring; 523 } else if (ctype == IS_COLORING_LOCAL) { 524 if (!dd->ghostedcoloring) { 525 ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr); 526 ii = 0; 527 for (j=gys; j<gys+gny; j++) { 528 for (i=gxs; i<gxs+gnx; i++) { 529 for (k=0; k<nc; k++) { 530 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 531 } 532 } 533 } 534 ncolors = 5*nc; 535 ierr = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr); 536 ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr); 537 } 538 *coloring = dd->ghostedcoloring; 539 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 540 PetscFunctionReturn(0); 541 } 542 543 /* =========================================================================== */ 544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 546 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 548 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 550 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 551 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 552 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 553 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 554 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 557 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 558 559 /*@C 560 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 561 562 Logically Collective on mat 563 564 Input Parameters: 565 + mat - the matrix 566 - da - the da 567 568 Level: intermediate 569 570 @*/ 571 PetscErrorCode MatSetupDM(Mat mat,DM da) 572 { 573 PetscErrorCode ierr; 574 575 PetscFunctionBegin; 576 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 577 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 578 ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 583 { 584 DM da; 585 PetscErrorCode ierr; 586 const char *prefix; 587 Mat Anatural; 588 AO ao; 589 PetscInt rstart,rend,*petsc,i; 590 IS is; 591 MPI_Comm comm; 592 PetscViewerFormat format; 593 594 PetscFunctionBegin; 595 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 596 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 597 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 598 599 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 600 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 601 PetscCheckFalse(!da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 602 603 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 604 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 605 ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr); 606 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 607 ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr); 608 ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 609 610 /* call viewer on natural ordering */ 611 ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr); 612 ierr = ISDestroy(&is);CHKERRQ(ierr); 613 ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr); 614 ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr); 615 ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr); 616 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 617 ierr = MatView(Anatural,viewer);CHKERRQ(ierr); 618 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 619 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 624 { 625 DM da; 626 PetscErrorCode ierr; 627 Mat Anatural,Aapp; 628 AO ao; 629 PetscInt rstart,rend,*app,i,m,n,M,N; 630 IS is; 631 MPI_Comm comm; 632 633 PetscFunctionBegin; 634 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 635 ierr = MatGetDM(A, &da);CHKERRQ(ierr); 636 PetscCheckFalse(!da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 637 638 /* Load the matrix in natural ordering */ 639 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr); 640 ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr); 641 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 643 ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr); 644 ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr); 645 646 /* Map natural ordering to application ordering and create IS */ 647 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr); 648 ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr); 649 ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr); 650 for (i=rstart; i<rend; i++) app[i-rstart] = i; 651 ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr); 652 ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 653 654 /* Do permutation and replace header */ 655 ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr); 656 ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr); 657 ierr = ISDestroy(&is);CHKERRQ(ierr); 658 ierr = MatDestroy(&Anatural);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 663 { 664 PetscErrorCode ierr; 665 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 666 Mat A; 667 MPI_Comm comm; 668 MatType Atype; 669 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 670 MatType mtype; 671 PetscMPIInt size; 672 DM_DA *dd = (DM_DA*)da->data; 673 674 PetscFunctionBegin; 675 ierr = MatInitializePackage();CHKERRQ(ierr); 676 mtype = da->mattype; 677 678 /* 679 m 680 ------------------------------------------------------ 681 | | 682 | | 683 | ---------------------- | 684 | | | | 685 n | ny | | | 686 | | | | 687 | .--------------------- | 688 | (xs,ys) nx | 689 | . | 690 | (gxs,gys) | 691 | | 692 ----------------------------------------------------- 693 */ 694 695 /* 696 nc - number of components per grid point 697 col - number of colors needed in one direction for single component problem 698 699 */ 700 M = dd->M; 701 N = dd->N; 702 P = dd->P; 703 dim = da->dim; 704 dof = dd->w; 705 /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */ 706 ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr); 707 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 708 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 709 ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr); 710 ierr = MatSetType(A,mtype);CHKERRQ(ierr); 711 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 712 if (dof*nx*ny*nz < da->bind_below) { 713 ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr); 714 ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr); 715 } 716 ierr = MatSetDM(A,da);CHKERRQ(ierr); 717 if (da->structure_only) { 718 ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 ierr = MatGetType(A,&Atype);CHKERRQ(ierr); 721 /* 722 We do not provide a getmatrix function in the DMDA operations because 723 the basic DMDA does not know about matrices. We think of DMDA as being more 724 more low-level than matrices. This is kind of cheating but, cause sometimes 725 we think of DMDA has higher level than matrices. 726 727 We could switch based on Atype (or mtype), but we do not since the 728 specialized setting routines depend only on the particular preallocation 729 details of the matrix, not the type itself. 730 */ 731 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 732 if (!aij) { 733 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr); 734 } 735 if (!aij) { 736 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 737 if (!baij) { 738 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr); 739 } 740 if (!baij) { 741 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 742 if (!sbaij) { 743 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr); 744 } 745 if (!sbaij) { 746 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr); 747 if (!sell) { 748 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr); 749 } 750 } 751 if (!sell) { 752 ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr); 753 } 754 } 755 } 756 if (aij) { 757 if (dim == 1) { 758 if (dd->ofill) { 759 ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 760 } else { 761 DMBoundaryType bx; 762 PetscMPIInt size; 763 ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 765 if (size == 1 && bx == DM_BOUNDARY_NONE) { 766 ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);CHKERRQ(ierr); 767 } else { 768 ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr); 769 } 770 } 771 } else if (dim == 2) { 772 if (dd->ofill) { 773 ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 774 } else { 775 ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr); 776 } 777 } else if (dim == 3) { 778 if (dd->ofill) { 779 ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr); 780 } else { 781 ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr); 782 } 783 } 784 } else if (baij) { 785 if (dim == 2) { 786 ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr); 787 } else if (dim == 3) { 788 ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr); 789 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 790 } else if (sbaij) { 791 if (dim == 2) { 792 ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr); 793 } else if (dim == 3) { 794 ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr); 795 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 796 } else if (sell) { 797 if (dim == 2) { 798 ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr); 799 } else if (dim == 3) { 800 ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr); 801 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 802 } else if (is) { 803 ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr); 804 } else { 805 ISLocalToGlobalMapping ltog; 806 807 ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr); 808 ierr = MatSetUp(A);CHKERRQ(ierr); 809 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 810 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 811 } 812 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 813 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 814 ierr = MatSetDM(A,da);CHKERRQ(ierr); 815 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 816 if (size > 1) { 817 /* change viewer to display matrix in natural ordering */ 818 ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr); 819 ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr); 820 } 821 *J = A; 822 PetscFunctionReturn(0); 823 } 824 825 /* ---------------------------------------------------------------------------------*/ 826 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 827 828 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 829 { 830 DM_DA *da = (DM_DA*)dm->data; 831 Mat lJ,P; 832 ISLocalToGlobalMapping ltog; 833 IS is; 834 PetscBT bt; 835 const PetscInt *e_loc,*idx; 836 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 837 PetscErrorCode ierr; 838 839 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 840 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 841 PetscFunctionBegin; 842 dof = da->w; 843 ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr); 844 ierr = DMGetLocalToGlobalMapping(dm,<og);CHKERRQ(ierr); 845 846 /* flag local elements indices in local DMDA numbering */ 847 ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr); 848 ierr = PetscBTCreate(nv/dof,&bt);CHKERRQ(ierr); 849 ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 850 for (i=0;i<nel*nen;i++) { ierr = PetscBTSet(bt,e_loc[i]);CHKERRQ(ierr); } 851 852 /* filter out (set to -1) the global indices not used by the local elements */ 853 ierr = PetscMalloc1(nv/dof,&gidx);CHKERRQ(ierr); 854 ierr = ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);CHKERRQ(ierr); 855 ierr = PetscArraycpy(gidx,idx,nv/dof);CHKERRQ(ierr); 856 ierr = ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);CHKERRQ(ierr); 857 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 858 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 859 ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);CHKERRQ(ierr); 860 ierr = ISLocalToGlobalMappingCreateIS(is,<og);CHKERRQ(ierr); 861 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 862 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 863 ierr = ISDestroy(&is);CHKERRQ(ierr); 864 865 /* Preallocation */ 866 ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr); 867 ierr = MatGetLocalToGlobalMapping(lJ,<og,NULL);CHKERRQ(ierr); 868 ierr = MatCreate(PetscObjectComm((PetscObject)lJ),&P);CHKERRQ(ierr); 869 ierr = MatSetType(P,MATPREALLOCATOR);CHKERRQ(ierr); 870 ierr = MatSetLocalToGlobalMapping(P,ltog,ltog);CHKERRQ(ierr); 871 ierr = MatGetSize(lJ,&N,NULL);CHKERRQ(ierr); 872 ierr = MatGetLocalSize(lJ,&n,NULL);CHKERRQ(ierr); 873 ierr = MatSetSizes(P,n,n,N,N);CHKERRQ(ierr); 874 ierr = MatSetBlockSize(P,dof);CHKERRQ(ierr); 875 ierr = MatSetUp(P);CHKERRQ(ierr); 876 for (i=0;i<nel;i++) { 877 ierr = MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);CHKERRQ(ierr); 878 } 879 ierr = MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);CHKERRQ(ierr); 880 ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr); 881 ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); 882 ierr = MatDestroy(&P);CHKERRQ(ierr); 883 884 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 890 { 891 PetscErrorCode ierr; 892 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 893 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 894 MPI_Comm comm; 895 PetscScalar *values; 896 DMBoundaryType bx,by; 897 ISLocalToGlobalMapping ltog; 898 DMDAStencilType st; 899 900 PetscFunctionBegin; 901 /* 902 nc - number of components per grid point 903 col - number of colors needed in one direction for single component problem 904 905 */ 906 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 907 col = 2*s + 1; 908 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 909 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 910 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 911 912 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 913 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 914 915 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 916 /* determine the matrix preallocation information */ 917 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 918 for (i=xs; i<xs+nx; i++) { 919 920 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 921 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 922 923 for (j=ys; j<ys+ny; j++) { 924 slot = i - gxs + gnx*(j - gys); 925 926 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 927 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 928 929 cnt = 0; 930 for (k=0; k<nc; k++) { 931 for (l=lstart; l<lend+1; l++) { 932 for (p=pstart; p<pend+1; p++) { 933 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 934 cols[cnt++] = k + nc*(slot + gnx*l + p); 935 } 936 } 937 } 938 rows[k] = k + nc*(slot); 939 } 940 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 941 } 942 } 943 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 944 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 945 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 946 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 947 948 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 949 950 /* 951 For each node in the grid: we get the neighbors in the local (on processor ordering 952 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 953 PETSc ordering. 954 */ 955 if (!da->prealloc_only) { 956 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 957 for (i=xs; i<xs+nx; i++) { 958 959 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 960 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 961 962 for (j=ys; j<ys+ny; j++) { 963 slot = i - gxs + gnx*(j - gys); 964 965 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 966 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 967 968 cnt = 0; 969 for (k=0; k<nc; k++) { 970 for (l=lstart; l<lend+1; l++) { 971 for (p=pstart; p<pend+1; p++) { 972 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 973 cols[cnt++] = k + nc*(slot + gnx*l + p); 974 } 975 } 976 } 977 rows[k] = k + nc*(slot); 978 } 979 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 980 } 981 } 982 ierr = PetscFree(values);CHKERRQ(ierr); 983 /* do not copy values to GPU since they are all zero and not yet needed there */ 984 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 985 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 988 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 989 } 990 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 995 { 996 PetscErrorCode ierr; 997 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 998 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 999 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1000 MPI_Comm comm; 1001 PetscScalar *values; 1002 DMBoundaryType bx,by,bz; 1003 ISLocalToGlobalMapping ltog; 1004 DMDAStencilType st; 1005 1006 PetscFunctionBegin; 1007 /* 1008 nc - number of components per grid point 1009 col - number of colors needed in one direction for single component problem 1010 1011 */ 1012 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1013 col = 2*s + 1; 1014 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1015 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1016 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1017 1018 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1019 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1020 1021 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1022 /* determine the matrix preallocation information */ 1023 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1024 for (i=xs; i<xs+nx; i++) { 1025 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1026 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1027 for (j=ys; j<ys+ny; j++) { 1028 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1029 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1030 for (k=zs; k<zs+nz; k++) { 1031 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1032 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1033 1034 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1035 1036 cnt = 0; 1037 for (l=0; l<nc; l++) { 1038 for (ii=istart; ii<iend+1; ii++) { 1039 for (jj=jstart; jj<jend+1; jj++) { 1040 for (kk=kstart; kk<kend+1; kk++) { 1041 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1042 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1043 } 1044 } 1045 } 1046 } 1047 rows[l] = l + nc*(slot); 1048 } 1049 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1050 } 1051 } 1052 } 1053 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1054 ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1055 ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1056 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1057 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1058 1059 /* 1060 For each node in the grid: we get the neighbors in the local (on processor ordering 1061 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1062 PETSc ordering. 1063 */ 1064 if (!da->prealloc_only) { 1065 ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr); 1066 for (i=xs; i<xs+nx; i++) { 1067 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1068 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1069 for (j=ys; j<ys+ny; j++) { 1070 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1071 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1072 for (k=zs; k<zs+nz; k++) { 1073 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1074 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1075 1076 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1077 1078 cnt = 0; 1079 for (l=0; l<nc; l++) { 1080 for (ii=istart; ii<iend+1; ii++) { 1081 for (jj=jstart; jj<jend+1; jj++) { 1082 for (kk=kstart; kk<kend+1; kk++) { 1083 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1084 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1085 } 1086 } 1087 } 1088 } 1089 rows[l] = l + nc*(slot); 1090 } 1091 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1092 } 1093 } 1094 } 1095 ierr = PetscFree(values);CHKERRQ(ierr); 1096 /* do not copy values to GPU since they are all zero and not yet needed there */ 1097 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1098 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1101 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1102 } 1103 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1108 { 1109 PetscErrorCode ierr; 1110 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1111 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1112 MPI_Comm comm; 1113 DMBoundaryType bx,by; 1114 ISLocalToGlobalMapping ltog,mltog; 1115 DMDAStencilType st; 1116 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1117 1118 PetscFunctionBegin; 1119 /* 1120 nc - number of components per grid point 1121 col - number of colors needed in one direction for single component problem 1122 1123 */ 1124 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1125 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1126 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1127 } 1128 col = 2*s + 1; 1129 /* 1130 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1131 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1132 */ 1133 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1134 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1135 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1136 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1138 1139 ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr); 1140 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1141 1142 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1143 /* determine the matrix preallocation information */ 1144 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1145 for (i=xs; i<xs+nx; i++) { 1146 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1147 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1148 1149 for (j=ys; j<ys+ny; j++) { 1150 slot = i - gxs + gnx*(j - gys); 1151 1152 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1153 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1154 1155 cnt = 0; 1156 for (k=0; k<nc; k++) { 1157 for (l=lstart; l<lend+1; l++) { 1158 for (p=pstart; p<pend+1; p++) { 1159 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1160 cols[cnt++] = k + nc*(slot + gnx*l + p); 1161 } 1162 } 1163 } 1164 rows[k] = k + nc*(slot); 1165 } 1166 if (removedups) { 1167 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1168 } else { 1169 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1170 } 1171 } 1172 } 1173 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1174 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1175 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1176 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1177 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1178 if (!mltog) { 1179 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1180 } 1181 1182 /* 1183 For each node in the grid: we get the neighbors in the local (on processor ordering 1184 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1185 PETSc ordering. 1186 */ 1187 if (!da->prealloc_only) { 1188 for (i=xs; i<xs+nx; i++) { 1189 1190 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1191 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1192 1193 for (j=ys; j<ys+ny; j++) { 1194 slot = i - gxs + gnx*(j - gys); 1195 1196 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1197 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1198 1199 cnt = 0; 1200 for (l=lstart; l<lend+1; l++) { 1201 for (p=pstart; p<pend+1; p++) { 1202 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1203 cols[cnt++] = nc*(slot + gnx*l + p); 1204 for (k=1; k<nc; k++) { 1205 cols[cnt] = 1 + cols[cnt-1];cnt++; 1206 } 1207 } 1208 } 1209 } 1210 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1211 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1212 } 1213 } 1214 /* do not copy values to GPU since they are all zero and not yet needed there */ 1215 ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr); 1216 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1217 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1218 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1219 if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);} 1220 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1221 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1222 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1223 } 1224 } 1225 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1230 { 1231 PetscErrorCode ierr; 1232 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1233 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1234 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1235 DM_DA *dd = (DM_DA*)da->data; 1236 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1237 MPI_Comm comm; 1238 DMBoundaryType bx,by; 1239 ISLocalToGlobalMapping ltog; 1240 DMDAStencilType st; 1241 PetscBool removedups = PETSC_FALSE; 1242 1243 PetscFunctionBegin; 1244 /* 1245 nc - number of components per grid point 1246 col - number of colors needed in one direction for single component problem 1247 1248 */ 1249 ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1250 col = 2*s + 1; 1251 /* 1252 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1253 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1254 */ 1255 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1256 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1257 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1258 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1260 1261 ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr); 1262 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1263 1264 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1265 /* determine the matrix preallocation information */ 1266 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr); 1267 for (i=xs; i<xs+nx; i++) { 1268 1269 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1270 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1271 1272 for (j=ys; j<ys+ny; j++) { 1273 slot = i - gxs + gnx*(j - gys); 1274 1275 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1276 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1277 1278 for (k=0; k<nc; k++) { 1279 cnt = 0; 1280 for (l=lstart; l<lend+1; l++) { 1281 for (p=pstart; p<pend+1; p++) { 1282 if (l || p) { 1283 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1284 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1285 } 1286 } else { 1287 if (dfill) { 1288 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1289 } else { 1290 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1291 } 1292 } 1293 } 1294 } 1295 row = k + nc*(slot); 1296 maxcnt = PetscMax(maxcnt,cnt); 1297 if (removedups) { 1298 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1299 } else { 1300 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1301 } 1302 } 1303 } 1304 } 1305 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1306 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1307 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1308 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1309 1310 /* 1311 For each node in the grid: we get the neighbors in the local (on processor ordering 1312 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1313 PETSc ordering. 1314 */ 1315 if (!da->prealloc_only) { 1316 for (i=xs; i<xs+nx; i++) { 1317 1318 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1319 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1320 1321 for (j=ys; j<ys+ny; j++) { 1322 slot = i - gxs + gnx*(j - gys); 1323 1324 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1325 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1326 1327 for (k=0; k<nc; k++) { 1328 cnt = 0; 1329 for (l=lstart; l<lend+1; l++) { 1330 for (p=pstart; p<pend+1; p++) { 1331 if (l || p) { 1332 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1333 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1334 } 1335 } else { 1336 if (dfill) { 1337 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1338 } else { 1339 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1340 } 1341 } 1342 } 1343 } 1344 row = k + nc*(slot); 1345 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1346 } 1347 } 1348 } 1349 /* do not copy values to GPU since they are all zero and not yet needed there */ 1350 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1351 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1352 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1353 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1354 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscFree(cols);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 /* ---------------------------------------------------------------------------------*/ 1361 1362 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1363 { 1364 PetscErrorCode ierr; 1365 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1366 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1367 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1368 MPI_Comm comm; 1369 DMBoundaryType bx,by,bz; 1370 ISLocalToGlobalMapping ltog,mltog; 1371 DMDAStencilType st; 1372 PetscBool removedups = PETSC_FALSE; 1373 1374 PetscFunctionBegin; 1375 /* 1376 nc - number of components per grid point 1377 col - number of colors needed in one direction for single component problem 1378 1379 */ 1380 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1381 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1382 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1383 } 1384 col = 2*s + 1; 1385 1386 /* 1387 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1388 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1389 */ 1390 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1391 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1392 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1393 1394 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1395 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1396 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1397 1398 ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr); 1399 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1400 1401 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1402 /* determine the matrix preallocation information */ 1403 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1404 for (i=xs; i<xs+nx; i++) { 1405 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1406 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1407 for (j=ys; j<ys+ny; j++) { 1408 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1409 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1410 for (k=zs; k<zs+nz; k++) { 1411 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1412 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1413 1414 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1415 1416 cnt = 0; 1417 for (l=0; l<nc; l++) { 1418 for (ii=istart; ii<iend+1; ii++) { 1419 for (jj=jstart; jj<jend+1; jj++) { 1420 for (kk=kstart; kk<kend+1; kk++) { 1421 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1422 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1423 } 1424 } 1425 } 1426 } 1427 rows[l] = l + nc*(slot); 1428 } 1429 if (removedups) { 1430 ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1431 } else { 1432 ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1433 } 1434 } 1435 } 1436 } 1437 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1438 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 1439 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 1440 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1441 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1442 if (!mltog) { 1443 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1444 } 1445 1446 /* 1447 For each node in the grid: we get the neighbors in the local (on processor ordering 1448 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1449 PETSc ordering. 1450 */ 1451 if (!da->prealloc_only) { 1452 for (i=xs; i<xs+nx; i++) { 1453 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1454 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1455 for (j=ys; j<ys+ny; j++) { 1456 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1457 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1458 for (k=zs; k<zs+nz; k++) { 1459 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1460 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1461 1462 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1463 1464 cnt = 0; 1465 for (kk=kstart; kk<kend+1; kk++) { 1466 for (jj=jstart; jj<jend+1; jj++) { 1467 for (ii=istart; ii<iend+1; ii++) { 1468 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1469 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1470 for (l=1; l<nc; l++) { 1471 cols[cnt] = 1 + cols[cnt-1];cnt++; 1472 } 1473 } 1474 } 1475 } 1476 } 1477 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1478 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1479 } 1480 } 1481 } 1482 /* do not copy values to GPU since they are all zero and not yet needed there */ 1483 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1484 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1486 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1487 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1488 } 1489 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1490 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1491 } 1492 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 /* ---------------------------------------------------------------------------------*/ 1497 1498 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1499 { 1500 PetscErrorCode ierr; 1501 DM_DA *dd = (DM_DA*)da->data; 1502 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1503 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1504 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1505 DMBoundaryType bx; 1506 ISLocalToGlobalMapping ltog; 1507 PetscMPIInt rank,size; 1508 1509 PetscFunctionBegin; 1510 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr); 1511 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr); 1512 1513 /* 1514 nc - number of components per grid point 1515 1516 */ 1517 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1518 PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1519 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1520 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1521 1522 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1523 ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr); 1524 1525 /* 1526 note should be smaller for first and last process with no periodic 1527 does not handle dfill 1528 */ 1529 cnt = 0; 1530 /* coupling with process to the left */ 1531 for (i=0; i<s; i++) { 1532 for (j=0; j<nc; j++) { 1533 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1534 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1535 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1536 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1537 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1538 } 1539 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1540 cnt++; 1541 } 1542 } 1543 for (i=s; i<nx-s; i++) { 1544 for (j=0; j<nc; j++) { 1545 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1546 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1547 cnt++; 1548 } 1549 } 1550 /* coupling with process to the right */ 1551 for (i=nx-s; i<nx; i++) { 1552 for (j=0; j<nc; j++) { 1553 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1554 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1555 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1556 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1557 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1558 } 1559 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1560 cnt++; 1561 } 1562 } 1563 1564 ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr); 1565 ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr); 1566 ierr = PetscFree2(cols,ocols);CHKERRQ(ierr); 1567 1568 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1569 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1570 1571 /* 1572 For each node in the grid: we get the neighbors in the local (on processor ordering 1573 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1574 PETSc ordering. 1575 */ 1576 if (!da->prealloc_only) { 1577 ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr); 1578 row = xs*nc; 1579 /* coupling with process to the left */ 1580 for (i=xs; i<xs+s; i++) { 1581 for (j=0; j<nc; j++) { 1582 cnt = 0; 1583 if (rank) { 1584 for (l=0; l<s; l++) { 1585 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1586 } 1587 } 1588 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1589 for (l=0; l<s; l++) { 1590 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1591 } 1592 } 1593 if (dfill) { 1594 for (k=dfill[j]; k<dfill[j+1]; k++) { 1595 cols[cnt++] = i*nc + dfill[k]; 1596 } 1597 } else { 1598 for (k=0; k<nc; k++) { 1599 cols[cnt++] = i*nc + k; 1600 } 1601 } 1602 for (l=0; l<s; l++) { 1603 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1604 } 1605 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1606 row++; 1607 } 1608 } 1609 for (i=xs+s; i<xs+nx-s; i++) { 1610 for (j=0; j<nc; j++) { 1611 cnt = 0; 1612 for (l=0; l<s; l++) { 1613 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1614 } 1615 if (dfill) { 1616 for (k=dfill[j]; k<dfill[j+1]; k++) { 1617 cols[cnt++] = i*nc + dfill[k]; 1618 } 1619 } else { 1620 for (k=0; k<nc; k++) { 1621 cols[cnt++] = i*nc + k; 1622 } 1623 } 1624 for (l=0; l<s; l++) { 1625 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1626 } 1627 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1628 row++; 1629 } 1630 } 1631 /* coupling with process to the right */ 1632 for (i=xs+nx-s; i<xs+nx; i++) { 1633 for (j=0; j<nc; j++) { 1634 cnt = 0; 1635 for (l=0; l<s; l++) { 1636 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1637 } 1638 if (dfill) { 1639 for (k=dfill[j]; k<dfill[j+1]; k++) { 1640 cols[cnt++] = i*nc + dfill[k]; 1641 } 1642 } else { 1643 for (k=0; k<nc; k++) { 1644 cols[cnt++] = i*nc + k; 1645 } 1646 } 1647 if (rank < size-1) { 1648 for (l=0; l<s; l++) { 1649 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1650 } 1651 } 1652 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1653 for (l=0; l<s; l++) { 1654 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1655 } 1656 } 1657 ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1658 row++; 1659 } 1660 } 1661 ierr = PetscFree(cols);CHKERRQ(ierr); 1662 /* do not copy values to GPU since they are all zero and not yet needed there */ 1663 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1664 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1667 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 /* ---------------------------------------------------------------------------------*/ 1673 1674 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1675 { 1676 PetscErrorCode ierr; 1677 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1678 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1679 PetscInt istart,iend; 1680 DMBoundaryType bx; 1681 ISLocalToGlobalMapping ltog,mltog; 1682 1683 PetscFunctionBegin; 1684 /* 1685 nc - number of components per grid point 1686 col - number of colors needed in one direction for single component problem 1687 1688 */ 1689 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1690 if (bx == DM_BOUNDARY_NONE) { 1691 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr); 1692 } 1693 col = 2*s + 1; 1694 1695 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1696 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1697 1698 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1699 ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr); 1700 ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr); 1701 1702 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1703 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1704 if (!mltog) { 1705 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1706 } 1707 1708 /* 1709 For each node in the grid: we get the neighbors in the local (on processor ordering 1710 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1711 PETSc ordering. 1712 */ 1713 if (!da->prealloc_only) { 1714 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1715 for (i=xs; i<xs+nx; i++) { 1716 istart = PetscMax(-s,gxs - i); 1717 iend = PetscMin(s,gxs + gnx - i - 1); 1718 slot = i - gxs; 1719 1720 cnt = 0; 1721 for (i1=istart; i1<iend+1; i1++) { 1722 cols[cnt++] = nc*(slot + i1); 1723 for (l=1; l<nc; l++) { 1724 cols[cnt] = 1 + cols[cnt-1];cnt++; 1725 } 1726 } 1727 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1728 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1729 } 1730 /* do not copy values to GPU since they are all zero and not yet needed there */ 1731 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1732 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1733 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1734 if (bx == DM_BOUNDARY_NONE) { 1735 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1736 } 1737 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1738 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1739 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* ---------------------------------------------------------------------------------*/ 1745 1746 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1747 { 1748 PetscErrorCode ierr; 1749 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1750 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1751 PetscInt istart,iend; 1752 DMBoundaryType bx; 1753 ISLocalToGlobalMapping ltog,mltog; 1754 1755 PetscFunctionBegin; 1756 /* 1757 nc - number of components per grid point 1758 col - number of colors needed in one direction for single component problem 1759 */ 1760 ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr); 1761 col = 2*s + 1; 1762 1763 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr); 1764 ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr); 1765 1766 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr); 1768 1769 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1770 ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr); 1771 if (!mltog) { 1772 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1773 } 1774 1775 /* 1776 For each node in the grid: we get the neighbors in the local (on processor ordering 1777 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1778 PETSc ordering. 1779 */ 1780 if (!da->prealloc_only) { 1781 ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr); 1782 for (i=xs; i<xs+nx; i++) { 1783 istart = PetscMax(-s,gxs - i); 1784 iend = PetscMin(s,gxs + gnx - i - 1); 1785 slot = i - gxs; 1786 1787 cnt = 0; 1788 for (i1=istart; i1<iend+1; i1++) { 1789 cols[cnt++] = nc*(slot + i1); 1790 for (l=1; l<nc; l++) { 1791 cols[cnt] = 1 + cols[cnt-1];cnt++; 1792 } 1793 } 1794 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1795 ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr); 1796 } 1797 /* do not copy values to GPU since they are all zero and not yet needed there */ 1798 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1799 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1801 if (bx == DM_BOUNDARY_NONE) { 1802 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1803 } 1804 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1805 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1806 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1807 } 1808 ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1813 { 1814 PetscErrorCode ierr; 1815 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1816 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1817 PetscInt istart,iend,jstart,jend,ii,jj; 1818 MPI_Comm comm; 1819 PetscScalar *values; 1820 DMBoundaryType bx,by; 1821 DMDAStencilType st; 1822 ISLocalToGlobalMapping ltog; 1823 1824 PetscFunctionBegin; 1825 /* 1826 nc - number of components per grid point 1827 col - number of colors needed in one direction for single component problem 1828 */ 1829 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 1830 col = 2*s + 1; 1831 1832 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 1833 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 1834 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1835 1836 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 1837 1838 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1839 1840 /* determine the matrix preallocation information */ 1841 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 1842 for (i=xs; i<xs+nx; i++) { 1843 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1844 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1845 for (j=ys; j<ys+ny; j++) { 1846 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1847 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1848 slot = i - gxs + gnx*(j - gys); 1849 1850 /* Find block columns in block row */ 1851 cnt = 0; 1852 for (ii=istart; ii<iend+1; ii++) { 1853 for (jj=jstart; jj<jend+1; jj++) { 1854 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1855 cols[cnt++] = slot + ii + gnx*jj; 1856 } 1857 } 1858 } 1859 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1860 } 1861 } 1862 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1863 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1864 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1865 1866 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1867 1868 /* 1869 For each node in the grid: we get the neighbors in the local (on processor ordering 1870 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1871 PETSc ordering. 1872 */ 1873 if (!da->prealloc_only) { 1874 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 1875 for (i=xs; i<xs+nx; i++) { 1876 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1877 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1878 for (j=ys; j<ys+ny; j++) { 1879 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1880 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1881 slot = i - gxs + gnx*(j - gys); 1882 cnt = 0; 1883 for (ii=istart; ii<iend+1; ii++) { 1884 for (jj=jstart; jj<jend+1; jj++) { 1885 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1886 cols[cnt++] = slot + ii + gnx*jj; 1887 } 1888 } 1889 } 1890 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1891 } 1892 } 1893 ierr = PetscFree(values);CHKERRQ(ierr); 1894 /* do not copy values to GPU since they are all zero and not yet needed there */ 1895 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 1896 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1897 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1898 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 1899 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1900 } 1901 ierr = PetscFree(cols);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1906 { 1907 PetscErrorCode ierr; 1908 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1909 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1910 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1911 MPI_Comm comm; 1912 PetscScalar *values; 1913 DMBoundaryType bx,by,bz; 1914 DMDAStencilType st; 1915 ISLocalToGlobalMapping ltog; 1916 1917 PetscFunctionBegin; 1918 /* 1919 nc - number of components per grid point 1920 col - number of colors needed in one direction for single component problem 1921 1922 */ 1923 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 1924 col = 2*s + 1; 1925 1926 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 1927 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 1928 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 1929 1930 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 1931 1932 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1933 1934 /* determine the matrix preallocation information */ 1935 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 1936 for (i=xs; i<xs+nx; i++) { 1937 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1938 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1939 for (j=ys; j<ys+ny; j++) { 1940 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1941 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1942 for (k=zs; k<zs+nz; k++) { 1943 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1944 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1945 1946 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1947 1948 /* Find block columns in block row */ 1949 cnt = 0; 1950 for (ii=istart; ii<iend+1; ii++) { 1951 for (jj=jstart; jj<jend+1; jj++) { 1952 for (kk=kstart; kk<kend+1; kk++) { 1953 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1954 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1955 } 1956 } 1957 } 1958 } 1959 ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 1960 } 1961 } 1962 } 1963 ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 1964 ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 1965 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 1966 1967 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 1968 1969 /* 1970 For each node in the grid: we get the neighbors in the local (on processor ordering 1971 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1972 PETSc ordering. 1973 */ 1974 if (!da->prealloc_only) { 1975 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 1976 for (i=xs; i<xs+nx; i++) { 1977 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1978 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1979 for (j=ys; j<ys+ny; j++) { 1980 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1981 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1982 for (k=zs; k<zs+nz; k++) { 1983 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1984 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1985 1986 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1987 1988 cnt = 0; 1989 for (ii=istart; ii<iend+1; ii++) { 1990 for (jj=jstart; jj<jend+1; jj++) { 1991 for (kk=kstart; kk<kend+1; kk++) { 1992 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1993 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1994 } 1995 } 1996 } 1997 } 1998 ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 1999 } 2000 } 2001 } 2002 ierr = PetscFree(values);CHKERRQ(ierr); 2003 /* do not copy values to GPU since they are all zero and not yet needed there */ 2004 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2005 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2007 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2008 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2009 } 2010 ierr = PetscFree(cols);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 /* 2015 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2016 identify in the local ordering with periodic domain. 2017 */ 2018 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2019 { 2020 PetscErrorCode ierr; 2021 PetscInt i,n; 2022 2023 PetscFunctionBegin; 2024 ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr); 2025 ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr); 2026 for (i=0,n=0; i<*cnt; i++) { 2027 if (col[i] >= *row) col[n++] = col[i]; 2028 } 2029 *cnt = n; 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2034 { 2035 PetscErrorCode ierr; 2036 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2037 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2038 PetscInt istart,iend,jstart,jend,ii,jj; 2039 MPI_Comm comm; 2040 PetscScalar *values; 2041 DMBoundaryType bx,by; 2042 DMDAStencilType st; 2043 ISLocalToGlobalMapping ltog; 2044 2045 PetscFunctionBegin; 2046 /* 2047 nc - number of components per grid point 2048 col - number of colors needed in one direction for single component problem 2049 */ 2050 ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr); 2051 col = 2*s + 1; 2052 2053 ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr); 2054 ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr); 2055 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2056 2057 ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr); 2058 2059 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2060 2061 /* determine the matrix preallocation information */ 2062 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr); 2063 for (i=xs; i<xs+nx; i++) { 2064 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2065 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2066 for (j=ys; j<ys+ny; j++) { 2067 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2068 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2069 slot = i - gxs + gnx*(j - gys); 2070 2071 /* Find block columns in block row */ 2072 cnt = 0; 2073 for (ii=istart; ii<iend+1; ii++) { 2074 for (jj=jstart; jj<jend+1; jj++) { 2075 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2076 cols[cnt++] = slot + ii + gnx*jj; 2077 } 2078 } 2079 } 2080 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2081 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2082 } 2083 } 2084 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2085 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2086 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2087 2088 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2089 2090 /* 2091 For each node in the grid: we get the neighbors in the local (on processor ordering 2092 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2093 PETSc ordering. 2094 */ 2095 if (!da->prealloc_only) { 2096 ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr); 2097 for (i=xs; i<xs+nx; i++) { 2098 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2099 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2100 for (j=ys; j<ys+ny; j++) { 2101 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2102 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2103 slot = i - gxs + gnx*(j - gys); 2104 2105 /* Find block columns in block row */ 2106 cnt = 0; 2107 for (ii=istart; ii<iend+1; ii++) { 2108 for (jj=jstart; jj<jend+1; jj++) { 2109 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2110 cols[cnt++] = slot + ii + gnx*jj; 2111 } 2112 } 2113 } 2114 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2115 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2116 } 2117 } 2118 ierr = PetscFree(values);CHKERRQ(ierr); 2119 /* do not copy values to GPU since they are all zero and not yet needed there */ 2120 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2121 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2122 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2123 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2124 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(cols);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2131 { 2132 PetscErrorCode ierr; 2133 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2134 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2135 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2136 MPI_Comm comm; 2137 PetscScalar *values; 2138 DMBoundaryType bx,by,bz; 2139 DMDAStencilType st; 2140 ISLocalToGlobalMapping ltog; 2141 2142 PetscFunctionBegin; 2143 /* 2144 nc - number of components per grid point 2145 col - number of colors needed in one direction for single component problem 2146 */ 2147 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2148 col = 2*s + 1; 2149 2150 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2151 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2152 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2153 2154 /* create the matrix */ 2155 ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr); 2156 2157 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2158 2159 /* determine the matrix preallocation information */ 2160 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2161 for (i=xs; i<xs+nx; i++) { 2162 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2163 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2164 for (j=ys; j<ys+ny; j++) { 2165 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2166 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2167 for (k=zs; k<zs+nz; k++) { 2168 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2169 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2170 2171 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2172 2173 /* Find block columns in block row */ 2174 cnt = 0; 2175 for (ii=istart; ii<iend+1; ii++) { 2176 for (jj=jstart; jj<jend+1; jj++) { 2177 for (kk=kstart; kk<kend+1; kk++) { 2178 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2179 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2180 } 2181 } 2182 } 2183 } 2184 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2185 ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr); 2186 } 2187 } 2188 } 2189 ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr); 2190 ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr); 2191 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2192 2193 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2194 2195 /* 2196 For each node in the grid: we get the neighbors in the local (on processor ordering 2197 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2198 PETSc ordering. 2199 */ 2200 if (!da->prealloc_only) { 2201 ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr); 2202 for (i=xs; i<xs+nx; i++) { 2203 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2204 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2205 for (j=ys; j<ys+ny; j++) { 2206 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2207 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2208 for (k=zs; k<zs+nz; k++) { 2209 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2210 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2211 2212 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2213 2214 cnt = 0; 2215 for (ii=istart; ii<iend+1; ii++) { 2216 for (jj=jstart; jj<jend+1; jj++) { 2217 for (kk=kstart; kk<kend+1; kk++) { 2218 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2219 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2220 } 2221 } 2222 } 2223 } 2224 ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr); 2225 ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2226 } 2227 } 2228 } 2229 ierr = PetscFree(values);CHKERRQ(ierr); 2230 /* do not copy values to GPU since they are all zero and not yet needed there */ 2231 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2232 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2235 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2236 } 2237 ierr = PetscFree(cols);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 /* ---------------------------------------------------------------------------------*/ 2242 2243 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2244 { 2245 PetscErrorCode ierr; 2246 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2247 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2248 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2249 DM_DA *dd = (DM_DA*)da->data; 2250 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2251 MPI_Comm comm; 2252 PetscScalar *values; 2253 DMBoundaryType bx,by,bz; 2254 ISLocalToGlobalMapping ltog; 2255 DMDAStencilType st; 2256 PetscBool removedups = PETSC_FALSE; 2257 2258 PetscFunctionBegin; 2259 /* 2260 nc - number of components per grid point 2261 col - number of colors needed in one direction for single component problem 2262 2263 */ 2264 ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr); 2265 col = 2*s + 1; 2266 PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2267 by 2*stencil_width + 1\n"); 2268 PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2269 by 2*stencil_width + 1\n"); 2270 PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2271 by 2*stencil_width + 1\n"); 2272 2273 /* 2274 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2275 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2276 */ 2277 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2278 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2279 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2280 2281 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr); 2282 ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr); 2283 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 2284 2285 ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr); 2286 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 2287 2288 /* determine the matrix preallocation information */ 2289 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr); 2290 2291 ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr); 2292 for (i=xs; i<xs+nx; i++) { 2293 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2294 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2295 for (j=ys; j<ys+ny; j++) { 2296 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2297 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2298 for (k=zs; k<zs+nz; k++) { 2299 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2300 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2301 2302 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2303 2304 for (l=0; l<nc; l++) { 2305 cnt = 0; 2306 for (ii=istart; ii<iend+1; ii++) { 2307 for (jj=jstart; jj<jend+1; jj++) { 2308 for (kk=kstart; kk<kend+1; kk++) { 2309 if (ii || jj || kk) { 2310 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2311 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2312 } 2313 } else { 2314 if (dfill) { 2315 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2316 } else { 2317 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2318 } 2319 } 2320 } 2321 } 2322 } 2323 row = l + nc*(slot); 2324 maxcnt = PetscMax(maxcnt,cnt); 2325 if (removedups) { 2326 ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2327 } else { 2328 ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr); 2329 } 2330 } 2331 } 2332 } 2333 } 2334 ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr); 2335 ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr); 2336 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2337 ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr); 2338 2339 /* 2340 For each node in the grid: we get the neighbors in the local (on processor ordering 2341 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2342 PETSc ordering. 2343 */ 2344 if (!da->prealloc_only) { 2345 ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr); 2346 for (i=xs; i<xs+nx; i++) { 2347 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2348 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2349 for (j=ys; j<ys+ny; j++) { 2350 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2351 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2352 for (k=zs; k<zs+nz; k++) { 2353 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2354 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2355 2356 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2357 2358 for (l=0; l<nc; l++) { 2359 cnt = 0; 2360 for (ii=istart; ii<iend+1; ii++) { 2361 for (jj=jstart; jj<jend+1; jj++) { 2362 for (kk=kstart; kk<kend+1; kk++) { 2363 if (ii || jj || kk) { 2364 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2365 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2366 } 2367 } else { 2368 if (dfill) { 2369 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2370 } else { 2371 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2372 } 2373 } 2374 } 2375 } 2376 } 2377 row = l + nc*(slot); 2378 ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr); 2379 } 2380 } 2381 } 2382 } 2383 ierr = PetscFree(values);CHKERRQ(ierr); 2384 /* do not copy values to GPU since they are all zero and not yet needed there */ 2385 ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr); 2389 ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2390 } 2391 ierr = PetscFree(cols);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394