1 2 /* 3 This provides a matrix that consists of Mats 4 */ 5 6 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 8 9 typedef struct { 10 SEQAIJHEADER(Mat); 11 SEQBAIJHEADER; 12 Mat *diags; 13 14 Vec left,right,middle,workb; /* dummy vectors to perform local parts of product */ 15 } Mat_BlockMat; 16 17 extern PetscErrorCode MatBlockMatSetPreallocation(Mat,PetscInt,PetscInt,const PetscInt*); 18 19 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 20 { 21 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 22 PetscScalar *x; 23 const Mat *v; 24 const PetscScalar *b; 25 PetscErrorCode ierr; 26 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 27 const PetscInt *idx; 28 IS row,col; 29 MatFactorInfo info; 30 Vec left = a->left,right = a->right, middle = a->middle; 31 Mat *diag; 32 33 PetscFunctionBegin; 34 its = its*lits; 35 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 36 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 37 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 38 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 39 if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) { 40 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep"); 41 } 42 43 if (!a->diags) { 44 ierr = PetscMalloc1(mbs,&a->diags);CHKERRQ(ierr); 45 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) { 47 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);CHKERRQ(ierr); 48 ierr = MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);CHKERRQ(ierr); 49 ierr = MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);CHKERRQ(ierr); 50 ierr = ISDestroy(&row);CHKERRQ(ierr); 51 ierr = ISDestroy(&col);CHKERRQ(ierr); 52 } 53 ierr = VecDuplicate(bb,&a->workb);CHKERRQ(ierr); 54 } 55 diag = a->diags; 56 57 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 58 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 59 /* copy right hand side because it must be modified during iteration */ 60 ierr = VecCopy(bb,a->workb);CHKERRQ(ierr); 61 ierr = VecGetArrayRead(a->workb,&b);CHKERRQ(ierr); 62 63 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 64 while (its--) { 65 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 66 67 for (i=0; i<mbs; i++) { 68 n = a->i[i+1] - a->i[i] - 1; 69 idx = a->j + a->i[i] + 1; 70 v = a->a + a->i[i] + 1; 71 72 ierr = VecSet(left,0.0);CHKERRQ(ierr); 73 for (j=0; j<n; j++) { 74 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 75 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 76 ierr = VecResetArray(right);CHKERRQ(ierr); 77 } 78 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 79 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 80 ierr = VecResetArray(right);CHKERRQ(ierr); 81 82 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 83 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 84 85 /* now adjust right hand side, see MatSOR_SeqSBAIJ */ 86 for (j=0; j<n; j++) { 87 ierr = MatMultTranspose(v[j],right,left);CHKERRQ(ierr); 88 ierr = VecPlaceArray(middle,b + idx[j]*bs);CHKERRQ(ierr); 89 ierr = VecAXPY(middle,-1.0,left);CHKERRQ(ierr); 90 ierr = VecResetArray(middle);CHKERRQ(ierr); 91 } 92 ierr = VecResetArray(right);CHKERRQ(ierr); 93 94 } 95 } 96 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 97 98 for (i=mbs-1; i>=0; i--) { 99 n = a->i[i+1] - a->i[i] - 1; 100 idx = a->j + a->i[i] + 1; 101 v = a->a + a->i[i] + 1; 102 103 ierr = VecSet(left,0.0);CHKERRQ(ierr); 104 for (j=0; j<n; j++) { 105 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 106 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 107 ierr = VecResetArray(right);CHKERRQ(ierr); 108 } 109 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 110 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 111 ierr = VecResetArray(right);CHKERRQ(ierr); 112 113 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 114 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 115 ierr = VecResetArray(right);CHKERRQ(ierr); 116 117 } 118 } 119 } 120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 121 ierr = VecRestoreArrayRead(a->workb,&b);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 126 { 127 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 128 PetscScalar *x; 129 const Mat *v; 130 const PetscScalar *b; 131 PetscErrorCode ierr; 132 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 133 const PetscInt *idx; 134 IS row,col; 135 MatFactorInfo info; 136 Vec left = a->left,right = a->right; 137 Mat *diag; 138 139 PetscFunctionBegin; 140 its = its*lits; 141 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 145 146 if (!a->diags) { 147 ierr = PetscMalloc1(mbs,&a->diags);CHKERRQ(ierr); 148 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 149 for (i=0; i<mbs; i++) { 150 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);CHKERRQ(ierr); 151 ierr = MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);CHKERRQ(ierr); 152 ierr = MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);CHKERRQ(ierr); 153 ierr = ISDestroy(&row);CHKERRQ(ierr); 154 ierr = ISDestroy(&col);CHKERRQ(ierr); 155 } 156 } 157 diag = a->diags; 158 159 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 160 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 161 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 162 163 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 164 while (its--) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 166 167 for (i=0; i<mbs; i++) { 168 n = a->i[i+1] - a->i[i]; 169 idx = a->j + a->i[i]; 170 v = a->a + a->i[i]; 171 172 ierr = VecSet(left,0.0);CHKERRQ(ierr); 173 for (j=0; j<n; j++) { 174 if (idx[j] != i) { 175 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 176 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 177 ierr = VecResetArray(right);CHKERRQ(ierr); 178 } 179 } 180 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 181 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 182 ierr = VecResetArray(right);CHKERRQ(ierr); 183 184 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 185 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 186 ierr = VecResetArray(right);CHKERRQ(ierr); 187 } 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 191 for (i=mbs-1; i>=0; i--) { 192 n = a->i[i+1] - a->i[i]; 193 idx = a->j + a->i[i]; 194 v = a->a + a->i[i]; 195 196 ierr = VecSet(left,0.0);CHKERRQ(ierr); 197 for (j=0; j<n; j++) { 198 if (idx[j] != i) { 199 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 200 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 201 ierr = VecResetArray(right);CHKERRQ(ierr); 202 } 203 } 204 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 205 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 206 ierr = VecResetArray(right);CHKERRQ(ierr); 207 208 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 209 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 210 ierr = VecResetArray(right);CHKERRQ(ierr); 211 212 } 213 } 214 } 215 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 216 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 221 { 222 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 223 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 224 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 225 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 226 PetscErrorCode ierr; 227 PetscInt ridx,cidx; 228 PetscBool roworiented=a->roworiented; 229 MatScalar value; 230 Mat *ap,*aa = a->a; 231 232 PetscFunctionBegin; 233 for (k=0; k<m; k++) { /* loop over added rows */ 234 row = im[k]; 235 brow = row/bs; 236 if (row < 0) continue; 237 #if defined(PETSC_USE_DEBUG) 238 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 239 #endif 240 rp = aj + ai[brow]; 241 ap = aa + ai[brow]; 242 rmax = imax[brow]; 243 nrow = ailen[brow]; 244 low = 0; 245 high = nrow; 246 for (l=0; l<n; l++) { /* loop over added columns */ 247 if (in[l] < 0) continue; 248 #if defined(PETSC_USE_DEBUG) 249 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 250 #endif 251 col = in[l]; bcol = col/bs; 252 if (A->symmetric && brow > bcol) continue; 253 ridx = row % bs; cidx = col % bs; 254 if (roworiented) value = v[l + k*n]; 255 else value = v[k + l*m]; 256 257 if (col <= lastcol) low = 0; 258 else high = nrow; 259 lastcol = col; 260 while (high-low > 7) { 261 t = (low+high)/2; 262 if (rp[t] > bcol) high = t; 263 else low = t; 264 } 265 for (i=low; i<high; i++) { 266 if (rp[i] > bcol) break; 267 if (rp[i] == bcol) goto noinsert1; 268 } 269 if (nonew == 1) goto noinsert1; 270 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 271 MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat); 272 N = nrow++ - 1; high++; 273 /* shift up all the later entries in this row */ 274 for (ii=N; ii>=i; ii--) { 275 rp[ii+1] = rp[ii]; 276 ap[ii+1] = ap[ii]; 277 } 278 if (N>=i) ap[i] = 0; 279 rp[i] = bcol; 280 a->nz++; 281 A->nonzerostate++; 282 noinsert1:; 283 if (!*(ap+i)) { 284 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);CHKERRQ(ierr); 285 } 286 ierr = MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);CHKERRQ(ierr); 287 low = i; 288 } 289 ailen[brow] = nrow; 290 } 291 PetscFunctionReturn(0); 292 } 293 294 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 295 { 296 PetscErrorCode ierr; 297 Mat tmpA; 298 PetscInt i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0; 299 const PetscInt *cols; 300 const PetscScalar *values; 301 PetscBool flg = PETSC_FALSE,notdone; 302 Mat_SeqAIJ *a; 303 Mat_BlockMat *amat; 304 305 PetscFunctionBegin; 306 /* force binary viewer to load .info file if it has not yet done so */ 307 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 308 ierr = MatCreate(PETSC_COMM_SELF,&tmpA);CHKERRQ(ierr); 309 ierr = MatSetType(tmpA,MATSEQAIJ);CHKERRQ(ierr); 310 ierr = MatLoad_SeqAIJ(tmpA,viewer);CHKERRQ(ierr); 311 312 ierr = MatGetLocalSize(tmpA,&m,&n);CHKERRQ(ierr); 313 ierr = PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");CHKERRQ(ierr); 314 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 315 ierr = PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL);CHKERRQ(ierr); 316 ierr = PetscOptionsEnd();CHKERRQ(ierr); 317 318 /* Determine number of nonzero blocks for each block row */ 319 a = (Mat_SeqAIJ*) tmpA->data; 320 mbs = m/bs; 321 ierr = PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens);CHKERRQ(ierr); 322 ierr = PetscMemzero(lens,mbs*sizeof(PetscInt));CHKERRQ(ierr); 323 324 for (i=0; i<mbs; i++) { 325 for (j=0; j<bs; j++) { 326 ii[j] = a->j + a->i[i*bs + j]; 327 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 328 } 329 330 currentcol = -1; 331 while (PETSC_TRUE) { 332 notdone = PETSC_FALSE; 333 nextcol = 1000000000; 334 for (j=0; j<bs; j++) { 335 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { 336 ii[j]++; 337 ilens[j]--; 338 } 339 if (ilens[j] > 0) { 340 notdone = PETSC_TRUE; 341 nextcol = PetscMin(nextcol,ii[j][0]/bs); 342 } 343 } 344 if (!notdone) break; 345 if (!flg || (nextcol >= i)) lens[i]++; 346 currentcol = nextcol; 347 } 348 } 349 350 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 351 ierr = MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 352 } 353 ierr = MatBlockMatSetPreallocation(newmat,bs,0,lens);CHKERRQ(ierr); 354 if (flg) { 355 ierr = MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 356 } 357 amat = (Mat_BlockMat*)(newmat)->data; 358 359 /* preallocate the submatrices */ 360 ierr = PetscMalloc1(bs,&llens);CHKERRQ(ierr); 361 for (i=0; i<mbs; i++) { /* loops for block rows */ 362 for (j=0; j<bs; j++) { 363 ii[j] = a->j + a->i[i*bs + j]; 364 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 365 } 366 367 currentcol = 1000000000; 368 for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */ 369 if (ilens[j] > 0) { 370 currentcol = PetscMin(currentcol,ii[j][0]/bs); 371 } 372 } 373 374 while (PETSC_TRUE) { /* loops over blocks in block row */ 375 notdone = PETSC_FALSE; 376 nextcol = 1000000000; 377 ierr = PetscMemzero(llens,bs*sizeof(PetscInt));CHKERRQ(ierr); 378 for (j=0; j<bs; j++) { /* loop over rows in block */ 379 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */ 380 ii[j]++; 381 ilens[j]--; 382 llens[j]++; 383 } 384 if (ilens[j] > 0) { 385 notdone = PETSC_TRUE; 386 nextcol = PetscMin(nextcol,ii[j][0]/bs); 387 } 388 } 389 if (cnt >= amat->maxnz) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt); 390 if (!flg || currentcol >= i) { 391 amat->j[cnt] = currentcol; 392 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);CHKERRQ(ierr); 393 } 394 395 if (!notdone) break; 396 currentcol = nextcol; 397 } 398 amat->ilen[i] = lens[i]; 399 } 400 401 ierr = PetscFree3(lens,ii,ilens);CHKERRQ(ierr); 402 ierr = PetscFree(llens);CHKERRQ(ierr); 403 404 /* copy over the matrix, one row at a time */ 405 for (i=0; i<m; i++) { 406 ierr = MatGetRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 407 ierr = MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);CHKERRQ(ierr); 408 ierr = MatRestoreRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 409 } 410 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 411 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer) 416 { 417 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 418 PetscErrorCode ierr; 419 const char *name; 420 PetscViewerFormat format; 421 422 PetscFunctionBegin; 423 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 424 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 425 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 426 ierr = PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);CHKERRQ(ierr); 427 if (A->symmetric) { 428 ierr = PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");CHKERRQ(ierr); 429 } 430 } 431 PetscFunctionReturn(0); 432 } 433 434 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 435 { 436 PetscErrorCode ierr; 437 Mat_BlockMat *bmat = (Mat_BlockMat*)mat->data; 438 PetscInt i; 439 440 PetscFunctionBegin; 441 ierr = VecDestroy(&bmat->right);CHKERRQ(ierr); 442 ierr = VecDestroy(&bmat->left);CHKERRQ(ierr); 443 ierr = VecDestroy(&bmat->middle);CHKERRQ(ierr); 444 ierr = VecDestroy(&bmat->workb);CHKERRQ(ierr); 445 if (bmat->diags) { 446 for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) { 447 ierr = MatDestroy(&bmat->diags[i]);CHKERRQ(ierr); 448 } 449 } 450 if (bmat->a) { 451 for (i=0; i<bmat->nz; i++) { 452 ierr = MatDestroy(&bmat->a[i]);CHKERRQ(ierr); 453 } 454 } 455 ierr = MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); 456 ierr = PetscFree(mat->data);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y) 461 { 462 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 463 PetscErrorCode ierr; 464 PetscScalar *xx,*yy; 465 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 466 Mat *aa; 467 468 PetscFunctionBegin; 469 /* 470 Standard CSR multiply except each entry is a Mat 471 */ 472 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 473 474 ierr = VecSet(y,0.0);CHKERRQ(ierr); 475 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 476 aj = bmat->j; 477 aa = bmat->a; 478 ii = bmat->i; 479 for (i=0; i<m; i++) { 480 jrow = ii[i]; 481 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 482 n = ii[i+1] - jrow; 483 for (j=0; j<n; j++) { 484 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 485 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 486 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 487 jrow++; 488 } 489 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 490 } 491 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 492 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y) 497 { 498 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 499 PetscErrorCode ierr; 500 PetscScalar *xx,*yy; 501 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 502 Mat *aa; 503 504 PetscFunctionBegin; 505 /* 506 Standard CSR multiply except each entry is a Mat 507 */ 508 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 509 510 ierr = VecSet(y,0.0);CHKERRQ(ierr); 511 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 512 aj = bmat->j; 513 aa = bmat->a; 514 ii = bmat->i; 515 for (i=0; i<m; i++) { 516 jrow = ii[i]; 517 n = ii[i+1] - jrow; 518 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 519 ierr = VecPlaceArray(bmat->middle,xx + bs*i);CHKERRQ(ierr); 520 /* if we ALWAYS required a diagonal entry then could remove this if test */ 521 if (aj[jrow] == i) { 522 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 523 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 524 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 525 jrow++; 526 n--; 527 } 528 for (j=0; j<n; j++) { 529 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); /* upper triangular part */ 530 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 531 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 532 533 ierr = VecPlaceArray(bmat->right,yy + bs*aj[jrow]);CHKERRQ(ierr); /* lower triangular part */ 534 ierr = MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);CHKERRQ(ierr); 535 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 536 jrow++; 537 } 538 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 539 ierr = VecResetArray(bmat->middle);CHKERRQ(ierr); 540 } 541 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 542 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 547 { 548 PetscFunctionBegin; 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y) 553 { 554 PetscFunctionBegin; 555 PetscFunctionReturn(0); 556 } 557 558 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 559 { 560 PetscFunctionBegin; 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 Adds diagonal pointers to sparse matrix structure. 566 */ 567 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 568 { 569 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 570 PetscErrorCode ierr; 571 PetscInt i,j,mbs = A->rmap->n/A->rmap->bs; 572 573 PetscFunctionBegin; 574 if (!a->diag) { 575 ierr = PetscMalloc1(mbs,&a->diag);CHKERRQ(ierr); 576 } 577 for (i=0; i<mbs; i++) { 578 a->diag[i] = a->i[i+1]; 579 for (j=a->i[i]; j<a->i[i+1]; j++) { 580 if (a->j[j] == i) { 581 a->diag[i] = j; 582 break; 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 590 { 591 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 592 Mat_SeqAIJ *c; 593 PetscErrorCode ierr; 594 PetscInt i,k,first,step,lensi,nrows,ncols; 595 PetscInt *j_new,*i_new,*aj = a->j,*ailen = a->ilen; 596 PetscScalar *a_new; 597 Mat C,*aa = a->a; 598 PetscBool stride,equal; 599 600 PetscFunctionBegin; 601 ierr = ISEqual(isrow,iscol,&equal);CHKERRQ(ierr); 602 if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for idential column and row indices"); 603 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);CHKERRQ(ierr); 604 if (!stride) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices"); 605 ierr = ISStrideGetInfo(iscol,&first,&step);CHKERRQ(ierr); 606 if (step != A->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block"); 607 608 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 609 ncols = nrows; 610 611 /* create submatrix */ 612 if (scall == MAT_REUSE_MATRIX) { 613 PetscInt n_cols,n_rows; 614 C = *B; 615 ierr = MatGetSize(C,&n_rows,&n_cols);CHKERRQ(ierr); 616 if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); 617 ierr = MatZeroEntries(C);CHKERRQ(ierr); 618 } else { 619 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 620 ierr = MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 621 if (A->symmetric) { 622 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 623 } else { 624 ierr = MatSetType(C,MATSEQAIJ);CHKERRQ(ierr); 625 } 626 ierr = MatSeqAIJSetPreallocation(C,0,ailen);CHKERRQ(ierr); 627 ierr = MatSeqSBAIJSetPreallocation(C,1,0,ailen);CHKERRQ(ierr); 628 } 629 c = (Mat_SeqAIJ*)C->data; 630 631 /* loop over rows inserting into submatrix */ 632 a_new = c->a; 633 j_new = c->j; 634 i_new = c->i; 635 636 for (i=0; i<nrows; i++) { 637 lensi = ailen[i]; 638 for (k=0; k<lensi; k++) { 639 *j_new++ = *aj++; 640 ierr = MatGetValue(*aa++,first,first,a_new++);CHKERRQ(ierr); 641 } 642 i_new[i+1] = i_new[i] + lensi; 643 c->ilen[i] = lensi; 644 } 645 646 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 647 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 648 *B = C; 649 PetscFunctionReturn(0); 650 } 651 652 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode) 653 { 654 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 655 PetscErrorCode ierr; 656 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 657 PetscInt m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0; 658 Mat *aa = a->a,*ap; 659 660 PetscFunctionBegin; 661 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 662 663 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 664 for (i=1; i<m; i++) { 665 /* move each row back by the amount of empty slots (fshift) before it*/ 666 fshift += imax[i-1] - ailen[i-1]; 667 rmax = PetscMax(rmax,ailen[i]); 668 if (fshift) { 669 ip = aj + ai[i]; 670 ap = aa + ai[i]; 671 N = ailen[i]; 672 for (j=0; j<N; j++) { 673 ip[j-fshift] = ip[j]; 674 ap[j-fshift] = ap[j]; 675 } 676 } 677 ai[i] = ai[i-1] + ailen[i-1]; 678 } 679 if (m) { 680 fshift += imax[m-1] - ailen[m-1]; 681 ai[m] = ai[m-1] + ailen[m-1]; 682 } 683 /* reset ilen and imax for each row */ 684 for (i=0; i<m; i++) { 685 ailen[i] = imax[i] = ai[i+1] - ai[i]; 686 } 687 a->nz = ai[m]; 688 for (i=0; i<a->nz; i++) { 689 #if defined(PETSC_USE_DEBUG) 690 if (!aa[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz); 691 #endif 692 ierr = MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } 695 ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);CHKERRQ(ierr); 696 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 697 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);CHKERRQ(ierr); 698 699 A->info.mallocs += a->reallocs; 700 a->reallocs = 0; 701 A->info.nz_unneeded = (double)fshift; 702 a->rmax = rmax; 703 ierr = MatMarkDiagonal_BlockMat(A);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg) 708 { 709 PetscFunctionBegin; 710 if (opt == MAT_SYMMETRIC && flg) { 711 A->ops->sor = MatSOR_BlockMat_Symmetric; 712 A->ops->mult = MatMult_BlockMat_Symmetric; 713 } else { 714 PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 720 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 721 0, 722 0, 723 MatMult_BlockMat, 724 /* 4*/ MatMultAdd_BlockMat, 725 MatMultTranspose_BlockMat, 726 MatMultTransposeAdd_BlockMat, 727 0, 728 0, 729 0, 730 /* 10*/ 0, 731 0, 732 0, 733 MatSOR_BlockMat, 734 0, 735 /* 15*/ 0, 736 0, 737 0, 738 0, 739 0, 740 /* 20*/ 0, 741 MatAssemblyEnd_BlockMat, 742 MatSetOption_BlockMat, 743 0, 744 /* 24*/ 0, 745 0, 746 0, 747 0, 748 0, 749 /* 29*/ 0, 750 0, 751 0, 752 0, 753 0, 754 /* 34*/ 0, 755 0, 756 0, 757 0, 758 0, 759 /* 39*/ 0, 760 0, 761 0, 762 0, 763 0, 764 /* 44*/ 0, 765 0, 766 MatShift_Basic, 767 0, 768 0, 769 /* 49*/ 0, 770 0, 771 0, 772 0, 773 0, 774 /* 54*/ 0, 775 0, 776 0, 777 0, 778 0, 779 /* 59*/ MatCreateSubMatrix_BlockMat, 780 MatDestroy_BlockMat, 781 MatView_BlockMat, 782 0, 783 0, 784 /* 64*/ 0, 785 0, 786 0, 787 0, 788 0, 789 /* 69*/ 0, 790 0, 791 0, 792 0, 793 0, 794 /* 74*/ 0, 795 0, 796 0, 797 0, 798 0, 799 /* 79*/ 0, 800 0, 801 0, 802 0, 803 MatLoad_BlockMat, 804 /* 84*/ 0, 805 0, 806 0, 807 0, 808 0, 809 /* 89*/ 0, 810 0, 811 0, 812 0, 813 0, 814 /* 94*/ 0, 815 0, 816 0, 817 0, 818 0, 819 /* 99*/ 0, 820 0, 821 0, 822 0, 823 0, 824 /*104*/ 0, 825 0, 826 0, 827 0, 828 0, 829 /*109*/ 0, 830 0, 831 0, 832 0, 833 0, 834 /*114*/ 0, 835 0, 836 0, 837 0, 838 0, 839 /*119*/ 0, 840 0, 841 0, 842 0, 843 0, 844 /*124*/ 0, 845 0, 846 0, 847 0, 848 0, 849 /*129*/ 0, 850 0, 851 0, 852 0, 853 0, 854 /*134*/ 0, 855 0, 856 0, 857 0, 858 0, 859 /*139*/ 0, 860 0, 861 0 862 }; 863 864 /*@C 865 MatBlockMatSetPreallocation - For good matrix assembly performance 866 the user should preallocate the matrix storage by setting the parameter nz 867 (or the array nnz). By setting these parameters accurately, performance 868 during matrix assembly can be increased by more than a factor of 50. 869 870 Collective on MPI_Comm 871 872 Input Parameters: 873 + B - The matrix 874 . bs - size of each block in matrix 875 . nz - number of nonzeros per block row (same for all rows) 876 - nnz - array containing the number of nonzeros in the various block rows 877 (possibly different for each row) or NULL 878 879 Notes: 880 If nnz is given then nz is ignored 881 882 Specify the preallocated storage with either nz or nnz (not both). 883 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 884 allocation. For large problems you MUST preallocate memory or you 885 will get TERRIBLE performance, see the users' manual chapter on matrices. 886 887 Level: intermediate 888 889 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues() 890 891 @*/ 892 PetscErrorCode MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 893 { 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz) 902 { 903 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 904 PetscErrorCode ierr; 905 PetscInt i; 906 907 PetscFunctionBegin; 908 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 909 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 910 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 911 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 912 ierr = PetscLayoutGetBlockSize(A->rmap,&bs);CHKERRQ(ierr); 913 914 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 915 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 916 if (nnz) { 917 for (i=0; i<A->rmap->n/bs; i++) { 918 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 919 if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs); 920 } 921 } 922 bmat->mbs = A->rmap->n/bs; 923 924 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);CHKERRQ(ierr); 925 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);CHKERRQ(ierr); 926 ierr = VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);CHKERRQ(ierr); 927 928 if (!bmat->imax) { 929 ierr = PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);CHKERRQ(ierr); 930 ierr = PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 931 } 932 if (nnz) { 933 nz = 0; 934 for (i=0; i<A->rmap->n/A->rmap->bs; i++) { 935 bmat->imax[i] = nnz[i]; 936 nz += nnz[i]; 937 } 938 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation"); 939 940 /* bmat->ilen will count nonzeros in each row so far. */ 941 for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0; 942 943 /* allocate the matrix space */ 944 ierr = MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); 945 ierr = PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);CHKERRQ(ierr); 946 ierr = PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 947 bmat->i[0] = 0; 948 for (i=1; i<bmat->mbs+1; i++) { 949 bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1]; 950 } 951 bmat->singlemalloc = PETSC_TRUE; 952 bmat->free_a = PETSC_TRUE; 953 bmat->free_ij = PETSC_TRUE; 954 955 bmat->nz = 0; 956 bmat->maxnz = nz; 957 A->info.nz_unneeded = (double)bmat->maxnz; 958 ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /*MC 963 MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix 964 consisting of (usually) sparse blocks. 965 966 Level: advanced 967 968 .seealso: MatCreateBlockMat() 969 970 M*/ 971 972 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 973 { 974 Mat_BlockMat *b; 975 PetscErrorCode ierr; 976 977 PetscFunctionBegin; 978 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 979 A->data = (void*)b; 980 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 981 982 A->assembled = PETSC_TRUE; 983 A->preallocated = PETSC_FALSE; 984 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);CHKERRQ(ierr); 985 986 ierr = PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 /*@C 991 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object 992 993 Collective on MPI_Comm 994 995 Input Parameters: 996 + comm - MPI communicator 997 . m - number of rows 998 . n - number of columns 999 . bs - size of each submatrix 1000 . nz - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known) 1001 - nnz - expected number of nonzers per block row if known (use NULL otherwise) 1002 1003 1004 Output Parameter: 1005 . A - the matrix 1006 1007 Level: intermediate 1008 1009 Notes: Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object. Each Mat must 1010 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 1011 1012 For matrices containing parallel submatrices and variable block sizes, see MATNEST. 1013 1014 .keywords: matrix, bmat, create 1015 1016 .seealso: MATBLOCKMAT, MatCreateNest() 1017 @*/ 1018 PetscErrorCode MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A) 1019 { 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1024 ierr = MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1025 ierr = MatSetType(*A,MATBLOCKMAT);CHKERRQ(ierr); 1026 ierr = MatBlockMatSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029