1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 #include <../src/mat/impls/baij/mpi/mpibaij.h> 4 #include <petsc/private/isimpl.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFDColoringApply_BAIJ" 8 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 9 { 10 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 11 PetscErrorCode ierr; 12 PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j; 13 PetscScalar dx=0.0,*w3_array,*dy_i,*dy=coloring->dy; 14 PetscScalar *vscale_array; 15 const PetscScalar *xx; 16 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 17 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 18 void *fctx=coloring->fctx; 19 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 20 PetscScalar *valaddr; 21 MatEntry *Jentry=coloring->matentry; 22 MatEntry2 *Jentry2=coloring->matentry2; 23 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 24 PetscInt bs=J->rmap->bs; 25 26 PetscFunctionBegin; 27 /* (1) Set w1 = F(x1) */ 28 if (!coloring->fset) { 29 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 30 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 31 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 32 } else { 33 coloring->fset = PETSC_FALSE; 34 } 35 36 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 37 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 38 if (coloring->htype[0] == 'w') { 39 /* vscale = dx is a constant scalar */ 40 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 42 } else { 43 ierr = VecGetArrayRead(x1,&xx);CHKERRQ(ierr); 44 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 45 for (col=0; col<nxloc; col++) { 46 dx = xx[col]; 47 if (PetscAbsScalar(dx) < umin) { 48 if (PetscRealPart(dx) >= 0.0) dx = umin; 49 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 50 } 51 dx *= epsilon; 52 vscale_array[col] = 1.0/dx; 53 } 54 ierr = VecRestoreArrayRead(x1,&xx);CHKERRQ(ierr); 55 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 56 } 57 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 58 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 59 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 60 } 61 62 /* (3) Loop over each color */ 63 if (!coloring->w3) { 64 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 65 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 66 } 67 w3 = coloring->w3; 68 69 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 70 if (vscale) { 71 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 72 } 73 nz = 0; 74 for (k=0; k<ncolors; k++) { 75 coloring->currentcolor = k; 76 77 /* 78 (3-1) Loop over each column associated with color 79 adding the perturbation to the vector w3 = x1 + dx. 80 */ 81 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 82 dy_i = dy; 83 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 84 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 85 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 86 if (coloring->htype[0] == 'w') { 87 for (l=0; l<ncolumns[k]; l++) { 88 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 89 w3_array[col] += 1.0/dx; 90 if (i) w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */ 91 } 92 } else { /* htype == 'ds' */ 93 vscale_array -= cstart; /* shift pointer so global index can be used */ 94 for (l=0; l<ncolumns[k]; l++) { 95 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 96 w3_array[col] += 1.0/vscale_array[col]; 97 if (i) w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */ 98 } 99 vscale_array += cstart; 100 } 101 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 102 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 103 104 /* 105 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 106 w2 = F(x1 + dx) - F(x1) 107 */ 108 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 109 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 110 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 111 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 112 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 113 ierr = VecResetArray(w2);CHKERRQ(ierr); 114 dy_i += nxloc; /* points to dy+i*nxloc */ 115 } 116 117 /* 118 (3-3) Loop over rows of vector, putting results into Jacobian matrix 119 */ 120 nrows_k = nrows[k]; 121 if (coloring->htype[0] == 'w') { 122 for (l=0; l<nrows_k; l++) { 123 row = bs*Jentry2[nz].row; /* local row index */ 124 valaddr = Jentry2[nz++].valaddr; 125 spidx = 0; 126 dy_i = dy; 127 for (i=0; i<bs; i++) { /* column of the block */ 128 for (j=0; j<bs; j++) { /* row of the block */ 129 valaddr[spidx++] = dy_i[row+j]*dx; 130 } 131 dy_i += nxloc; /* points to dy+i*nxloc */ 132 } 133 } 134 } else { /* htype == 'ds' */ 135 for (l=0; l<nrows_k; l++) { 136 row = bs*Jentry[nz].row; /* local row index */ 137 col = bs*Jentry[nz].col; /* local column index */ 138 valaddr = Jentry[nz++].valaddr; 139 spidx = 0; 140 dy_i = dy; 141 for (i=0; i<bs; i++) { /* column of the block */ 142 for (j=0; j<bs; j++) { /* row of the block */ 143 valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i]; 144 } 145 dy_i += nxloc; /* points to dy+i*nxloc */ 146 } 147 } 148 } 149 } 150 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 151 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 152 if (vscale) { 153 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 154 } 155 156 coloring->currentcolor = -1; 157 PetscFunctionReturn(0); 158 } 159 160 /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */ 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatFDColoringApply_AIJ" 163 PetscErrorCode MatFDColoringApply_AIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 164 { 165 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 166 PetscErrorCode ierr; 167 PetscInt k,cstart,cend,l,row,col,nz; 168 PetscScalar dx=0.0,*y,*w3_array; 169 const PetscScalar *xx; 170 PetscScalar *vscale_array; 171 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 172 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 173 void *fctx=coloring->fctx; 174 ISColoringType ctype=coloring->ctype; 175 PetscInt nxloc,nrows_k; 176 MatEntry *Jentry=coloring->matentry; 177 MatEntry2 *Jentry2=coloring->matentry2; 178 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 179 180 PetscFunctionBegin; 181 if ((ctype == IS_COLORING_LOCAL) && (J->ops->fdcoloringapply == MatFDColoringApply_AIJ)) SETERRQ(PetscObjectComm((PetscObject)J),PETSC_ERR_SUP,"Must call MatColoringUseDM() with IS_COLORING_LOCAL"); 182 /* (1) Set w1 = F(x1) */ 183 if (!coloring->fset) { 184 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 185 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 186 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 187 } else { 188 coloring->fset = PETSC_FALSE; 189 } 190 191 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 192 if (coloring->htype[0] == 'w') { 193 /* vscale = 1./dx is a constant scalar */ 194 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 195 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 196 } else { 197 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 198 ierr = VecGetArrayRead(x1,&xx);CHKERRQ(ierr); 199 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 200 for (col=0; col<nxloc; col++) { 201 dx = xx[col]; 202 if (PetscAbsScalar(dx) < umin) { 203 if (PetscRealPart(dx) >= 0.0) dx = umin; 204 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 205 } 206 dx *= epsilon; 207 vscale_array[col] = 1.0/dx; 208 } 209 ierr = VecRestoreArrayRead(x1,&xx);CHKERRQ(ierr); 210 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 211 } 212 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 213 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 214 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 215 } 216 217 /* (3) Loop over each color */ 218 if (!coloring->w3) { 219 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 220 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 221 } 222 w3 = coloring->w3; 223 224 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 225 if (vscale) { 226 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 227 } 228 nz = 0; 229 230 if (coloring->bcols > 1) { /* use blocked insertion of Jentry */ 231 PetscInt i,m=J->rmap->n,nbcols,bcols=coloring->bcols; 232 PetscScalar *dy=coloring->dy,*dy_k; 233 234 nbcols = 0; 235 for (k=0; k<ncolors; k+=bcols) { 236 coloring->currentcolor = k; 237 238 /* 239 (3-1) Loop over each column associated with color 240 adding the perturbation to the vector w3 = x1 + dx. 241 */ 242 243 dy_k = dy; 244 if (k + bcols > ncolors) bcols = ncolors - k; 245 for (i=0; i<bcols; i++) { 246 247 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 248 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 249 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 250 if (coloring->htype[0] == 'w') { 251 for (l=0; l<ncolumns[k+i]; l++) { 252 col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 253 w3_array[col] += 1.0/dx; 254 } 255 } else { /* htype == 'ds' */ 256 vscale_array -= cstart; /* shift pointer so global index can be used */ 257 for (l=0; l<ncolumns[k+i]; l++) { 258 col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 259 w3_array[col] += 1.0/vscale_array[col]; 260 } 261 vscale_array += cstart; 262 } 263 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 264 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 265 266 /* 267 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 268 w2 = F(x1 + dx) - F(x1) 269 */ 270 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 271 ierr = VecPlaceArray(w2,dy_k);CHKERRQ(ierr); /* place w2 to the array dy_i */ 272 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 273 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 274 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 275 ierr = VecResetArray(w2);CHKERRQ(ierr); 276 dy_k += m; /* points to dy+i*nxloc */ 277 } 278 279 /* 280 (3-3) Loop over block rows of vector, putting results into Jacobian matrix 281 */ 282 nrows_k = nrows[nbcols++]; 283 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 284 285 if (coloring->htype[0] == 'w') { 286 for (l=0; l<nrows_k; l++) { 287 row = Jentry2[nz].row; /* local row index */ 288 *(Jentry2[nz++].valaddr) = dy[row]*dx; 289 } 290 } else { /* htype == 'ds' */ 291 for (l=0; l<nrows_k; l++) { 292 row = Jentry[nz].row; /* local row index */ 293 *(Jentry[nz].valaddr) = dy[row]*vscale_array[Jentry[nz].col]; 294 nz++; 295 } 296 } 297 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 298 } 299 } else { /* bcols == 1 */ 300 for (k=0; k<ncolors; k++) { 301 coloring->currentcolor = k; 302 303 /* 304 (3-1) Loop over each column associated with color 305 adding the perturbation to the vector w3 = x1 + dx. 306 */ 307 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 308 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 309 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 310 if (coloring->htype[0] == 'w') { 311 for (l=0; l<ncolumns[k]; l++) { 312 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 313 w3_array[col] += 1.0/dx; 314 } 315 } else { /* htype == 'ds' */ 316 vscale_array -= cstart; /* shift pointer so global index can be used */ 317 for (l=0; l<ncolumns[k]; l++) { 318 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 319 w3_array[col] += 1.0/vscale_array[col]; 320 } 321 vscale_array += cstart; 322 } 323 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 324 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 325 326 /* 327 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 328 w2 = F(x1 + dx) - F(x1) 329 */ 330 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 331 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 332 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 333 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 334 335 /* 336 (3-3) Loop over rows of vector, putting results into Jacobian matrix 337 */ 338 nrows_k = nrows[k]; 339 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 340 if (coloring->htype[0] == 'w') { 341 for (l=0; l<nrows_k; l++) { 342 row = Jentry2[nz].row; /* local row index */ 343 *(Jentry2[nz++].valaddr) = y[row]*dx; 344 } 345 } else { /* htype == 'ds' */ 346 for (l=0; l<nrows_k; l++) { 347 row = Jentry[nz].row; /* local row index */ 348 *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col]; 349 nz++; 350 } 351 } 352 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 353 } 354 } 355 356 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 if (vscale) { 359 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 360 } 361 coloring->currentcolor = -1; 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatFDColoringSetUp_MPIXAIJ" 367 PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 368 { 369 PetscErrorCode ierr; 370 PetscMPIInt size,*ncolsonproc,*disp,nn; 371 PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb; 372 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL; 373 PetscInt nis=iscoloring->n,nctot,*cols; 374 IS *isa; 375 ISLocalToGlobalMapping map=mat->cmap->mapping; 376 PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx; 377 Mat A,B; 378 PetscScalar *A_val,*B_val,**valaddrhit; 379 MatEntry *Jentry; 380 MatEntry2 *Jentry2; 381 PetscBool isBAIJ; 382 PetscInt bcols=c->bcols; 383 #if defined(PETSC_USE_CTABLE) 384 PetscTable colmap=NULL; 385 #else 386 PetscInt *colmap=NULL; /* local col number of off-diag col */ 387 #endif 388 389 PetscFunctionBegin; 390 if (ctype == IS_COLORING_LOCAL) { 391 if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 392 ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); 393 } 394 395 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 396 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 397 if (isBAIJ) { 398 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 399 Mat_SeqBAIJ *spA,*spB; 400 A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a; 401 B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a; 402 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 403 if (!baij->colmap) { 404 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 405 } 406 colmap = baij->colmap; 407 ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 408 ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 409 410 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 411 PetscInt *garray; 412 ierr = PetscMalloc1(B->cmap->n,&garray);CHKERRQ(ierr); 413 for (i=0; i<baij->B->cmap->n/bs; i++) { 414 for (j=0; j<bs; j++) { 415 garray[i*bs+j] = bs*baij->garray[i]+j; 416 } 417 } 418 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 419 ierr = PetscFree(garray);CHKERRQ(ierr); 420 } 421 } else { 422 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 423 Mat_SeqAIJ *spA,*spB; 424 A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a; 425 B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a; 426 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 427 if (!aij->colmap) { 428 /* Allow access to data structures of local part of matrix 429 - creates aij->colmap which maps global column number to local number in part B */ 430 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 colmap = aij->colmap; 433 ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 434 ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 435 436 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 437 438 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 439 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 440 } 441 } 442 443 m = mat->rmap->n/bs; 444 cstart = mat->cmap->rstart/bs; 445 cend = mat->cmap->rend/bs; 446 447 ierr = PetscMalloc1(nis,&c->ncolumns);CHKERRQ(ierr); 448 ierr = PetscMalloc1(nis,&c->columns);CHKERRQ(ierr); 449 ierr = PetscCalloc1(nis,&c->nrows);CHKERRQ(ierr); 450 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 451 452 if (c->htype[0] == 'd') { 453 ierr = PetscMalloc1(nz,&Jentry);CHKERRQ(ierr); 454 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); 455 c->matentry = Jentry; 456 } else if (c->htype[0] == 'w') { 457 ierr = PetscMalloc1(nz,&Jentry2);CHKERRQ(ierr); 458 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr); 459 c->matentry2 = Jentry2; 460 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported"); 461 462 ierr = PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit);CHKERRQ(ierr); 463 nz = 0; 464 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 465 for (i=0; i<nis; i++) { /* for each local color */ 466 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 467 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 468 469 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 470 if (n) { 471 ierr = PetscMalloc1(n,&c->columns[i]);CHKERRQ(ierr); 472 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 473 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 474 } else { 475 c->columns[i] = 0; 476 } 477 478 if (ctype == IS_COLORING_GLOBAL) { 479 /* Determine nctot, the total (parallel) number of columns of this color */ 480 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 481 ierr = PetscMalloc2(size,&ncolsonproc,size,&disp);CHKERRQ(ierr); 482 483 /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 484 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 485 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 486 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 487 if (!nctot) { 488 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 489 } 490 491 disp[0] = 0; 492 for (j=1; j<size; j++) { 493 disp[j] = disp[j-1] + ncolsonproc[j-1]; 494 } 495 496 /* Get cols, the complete list of columns for this color on each process */ 497 ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); 498 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 499 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 500 } else if (ctype == IS_COLORING_LOCAL) { 501 /* Determine local number of columns of this color on this process, including ghost points */ 502 nctot = n; 503 ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); 504 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 505 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 506 507 /* Mark all rows affect by these columns */ 508 ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); 509 bs2 = bs*bs; 510 nrows_i = 0; 511 for (j=0; j<nctot; j++) { /* loop over columns*/ 512 if (ctype == IS_COLORING_LOCAL) { 513 col = ltog[cols[j]]; 514 } else { 515 col = cols[j]; 516 } 517 if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ 518 row = A_cj + A_ci[col-cstart]; 519 nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; 520 nrows_i += nrows; 521 /* loop over columns of A marking them in rowhit */ 522 for (k=0; k<nrows; k++) { 523 /* set valaddrhit for part A */ 524 spidx = bs2*spidxA[A_ci[col-cstart] + k]; 525 valaddrhit[*row] = &A_val[spidx]; 526 rowhit[*row++] = col - cstart + 1; /* local column index */ 527 } 528 } else { /* column is in B, off-diagonal block of mat */ 529 #if defined(PETSC_USE_CTABLE) 530 ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); 531 colb--; 532 #else 533 colb = colmap[col] - 1; /* local column index */ 534 #endif 535 if (colb == -1) { 536 nrows = 0; 537 } else { 538 colb = colb/bs; 539 row = B_cj + B_ci[colb]; 540 nrows = B_ci[colb+1] - B_ci[colb]; 541 } 542 nrows_i += nrows; 543 /* loop over columns of B marking them in rowhit */ 544 for (k=0; k<nrows; k++) { 545 /* set valaddrhit for part B */ 546 spidx = bs2*spidxB[B_ci[colb] + k]; 547 valaddrhit[*row] = &B_val[spidx]; 548 rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 549 } 550 } 551 } 552 c->nrows[i] = nrows_i; 553 554 if (c->htype[0] == 'd') { 555 for (j=0; j<m; j++) { 556 if (rowhit[j]) { 557 Jentry[nz].row = j; /* local row index */ 558 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 559 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 560 nz++; 561 } 562 } 563 } else { /* c->htype == 'wp' */ 564 for (j=0; j<m; j++) { 565 if (rowhit[j]) { 566 Jentry2[nz].row = j; /* local row index */ 567 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 568 nz++; 569 } 570 } 571 } 572 ierr = PetscFree(cols);CHKERRQ(ierr); 573 } 574 575 if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 576 ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr); 577 } 578 579 if (isBAIJ) { 580 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 581 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 582 ierr = PetscMalloc1(bs*mat->rmap->n,&c->dy);CHKERRQ(ierr); 583 } else { 584 ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 585 ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 586 } 587 588 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 589 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 590 591 if (ctype == IS_COLORING_LOCAL) { 592 ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); 593 } 594 ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatFDColoringCreate_MPIXAIJ" 600 PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 601 { 602 PetscErrorCode ierr; 603 PetscInt bs,nis=iscoloring->n,m=mat->rmap->n; 604 PetscBool isBAIJ; 605 606 PetscFunctionBegin; 607 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian; 608 bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 609 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 610 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 611 if (isBAIJ || m == 0) { 612 c->brows = m; 613 c->bcols = 1; 614 } else { /* mpiaij matrix */ 615 /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 616 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 617 Mat_SeqAIJ *spA,*spB; 618 Mat A,B; 619 PetscInt nz,brows,bcols; 620 PetscReal mem; 621 622 bs = 1; /* only bs=1 is supported for MPIAIJ matrix */ 623 624 A = aij->A; spA = (Mat_SeqAIJ*)A->data; 625 B = aij->B; spB = (Mat_SeqAIJ*)B->data; 626 nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 627 mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt); 628 bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar))); 629 brows = 1000/bcols; 630 if (bcols > nis) bcols = nis; 631 if (brows == 0 || brows > m) brows = m; 632 c->brows = brows; 633 c->bcols = bcols; 634 } 635 636 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 637 c->N = mat->cmap->N/bs; 638 c->m = mat->rmap->n/bs; 639 c->rstart = mat->rmap->rstart/bs; 640 c->ncolors = nis; 641 PetscFunctionReturn(0); 642 } 643