1 2 /* 3 This provides a matrix that consists of Mats 4 */ 5 6 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 8 9 typedef struct { 10 SEQAIJHEADER(Mat); 11 SEQBAIJHEADER; 12 Mat *diags; 13 14 Vec left,right,middle,workb; /* dummy vectors to perform local parts of product */ 15 } Mat_BlockMat; 16 17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 18 { 19 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 20 PetscScalar *x; 21 const Mat *v; 22 const PetscScalar *b; 23 PetscErrorCode ierr; 24 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 25 const PetscInt *idx; 26 IS row,col; 27 MatFactorInfo info; 28 Vec left = a->left,right = a->right, middle = a->middle; 29 Mat *diag; 30 31 PetscFunctionBegin; 32 its = its*lits; 33 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 34 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 35 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 36 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 37 if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) { 38 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep"); 39 } 40 41 if (!a->diags) { 42 ierr = PetscMalloc1(mbs,&a->diags);CHKERRQ(ierr); 43 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 44 for (i=0; i<mbs; i++) { 45 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);CHKERRQ(ierr); 46 ierr = MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);CHKERRQ(ierr); 47 ierr = MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);CHKERRQ(ierr); 48 ierr = ISDestroy(&row);CHKERRQ(ierr); 49 ierr = ISDestroy(&col);CHKERRQ(ierr); 50 } 51 ierr = VecDuplicate(bb,&a->workb);CHKERRQ(ierr); 52 } 53 diag = a->diags; 54 55 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 56 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 57 /* copy right hand side because it must be modified during iteration */ 58 ierr = VecCopy(bb,a->workb);CHKERRQ(ierr); 59 ierr = VecGetArrayRead(a->workb,&b);CHKERRQ(ierr); 60 61 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 62 while (its--) { 63 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 64 65 for (i=0; i<mbs; i++) { 66 n = a->i[i+1] - a->i[i] - 1; 67 idx = a->j + a->i[i] + 1; 68 v = a->a + a->i[i] + 1; 69 70 ierr = VecSet(left,0.0);CHKERRQ(ierr); 71 for (j=0; j<n; j++) { 72 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 73 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 74 ierr = VecResetArray(right);CHKERRQ(ierr); 75 } 76 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 77 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 78 ierr = VecResetArray(right);CHKERRQ(ierr); 79 80 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 81 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 82 83 /* now adjust right hand side, see MatSOR_SeqSBAIJ */ 84 for (j=0; j<n; j++) { 85 ierr = MatMultTranspose(v[j],right,left);CHKERRQ(ierr); 86 ierr = VecPlaceArray(middle,b + idx[j]*bs);CHKERRQ(ierr); 87 ierr = VecAXPY(middle,-1.0,left);CHKERRQ(ierr); 88 ierr = VecResetArray(middle);CHKERRQ(ierr); 89 } 90 ierr = VecResetArray(right);CHKERRQ(ierr); 91 92 } 93 } 94 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 95 96 for (i=mbs-1; i>=0; i--) { 97 n = a->i[i+1] - a->i[i] - 1; 98 idx = a->j + a->i[i] + 1; 99 v = a->a + a->i[i] + 1; 100 101 ierr = VecSet(left,0.0);CHKERRQ(ierr); 102 for (j=0; j<n; j++) { 103 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 104 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 105 ierr = VecResetArray(right);CHKERRQ(ierr); 106 } 107 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 108 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 109 ierr = VecResetArray(right);CHKERRQ(ierr); 110 111 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 112 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 113 ierr = VecResetArray(right);CHKERRQ(ierr); 114 115 } 116 } 117 } 118 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 119 ierr = VecRestoreArrayRead(a->workb,&b);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 124 { 125 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 126 PetscScalar *x; 127 const Mat *v; 128 const PetscScalar *b; 129 PetscErrorCode ierr; 130 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 131 const PetscInt *idx; 132 IS row,col; 133 MatFactorInfo info; 134 Vec left = a->left,right = a->right; 135 Mat *diag; 136 137 PetscFunctionBegin; 138 its = its*lits; 139 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 142 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 143 144 if (!a->diags) { 145 ierr = PetscMalloc1(mbs,&a->diags);CHKERRQ(ierr); 146 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 147 for (i=0; i<mbs; i++) { 148 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);CHKERRQ(ierr); 149 ierr = MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);CHKERRQ(ierr); 150 ierr = MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);CHKERRQ(ierr); 151 ierr = ISDestroy(&row);CHKERRQ(ierr); 152 ierr = ISDestroy(&col);CHKERRQ(ierr); 153 } 154 } 155 diag = a->diags; 156 157 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 159 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 160 161 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 162 while (its--) { 163 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 164 165 for (i=0; i<mbs; i++) { 166 n = a->i[i+1] - a->i[i]; 167 idx = a->j + a->i[i]; 168 v = a->a + a->i[i]; 169 170 ierr = VecSet(left,0.0);CHKERRQ(ierr); 171 for (j=0; j<n; j++) { 172 if (idx[j] != i) { 173 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 174 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 175 ierr = VecResetArray(right);CHKERRQ(ierr); 176 } 177 } 178 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 179 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 180 ierr = VecResetArray(right);CHKERRQ(ierr); 181 182 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 183 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 184 ierr = VecResetArray(right);CHKERRQ(ierr); 185 } 186 } 187 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 188 189 for (i=mbs-1; i>=0; i--) { 190 n = a->i[i+1] - a->i[i]; 191 idx = a->j + a->i[i]; 192 v = a->a + a->i[i]; 193 194 ierr = VecSet(left,0.0);CHKERRQ(ierr); 195 for (j=0; j<n; j++) { 196 if (idx[j] != i) { 197 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 198 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 199 ierr = VecResetArray(right);CHKERRQ(ierr); 200 } 201 } 202 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 203 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 204 ierr = VecResetArray(right);CHKERRQ(ierr); 205 206 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 207 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 208 ierr = VecResetArray(right);CHKERRQ(ierr); 209 210 } 211 } 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 219 { 220 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 221 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 222 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 223 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 224 PetscErrorCode ierr; 225 PetscInt ridx,cidx; 226 PetscBool roworiented=a->roworiented; 227 MatScalar value; 228 Mat *ap,*aa = a->a; 229 230 PetscFunctionBegin; 231 for (k=0; k<m; k++) { /* loop over added rows */ 232 row = im[k]; 233 brow = row/bs; 234 if (row < 0) continue; 235 #if defined(PETSC_USE_DEBUG) 236 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 237 #endif 238 rp = aj + ai[brow]; 239 ap = aa + ai[brow]; 240 rmax = imax[brow]; 241 nrow = ailen[brow]; 242 low = 0; 243 high = nrow; 244 for (l=0; l<n; l++) { /* loop over added columns */ 245 if (in[l] < 0) continue; 246 #if defined(PETSC_USE_DEBUG) 247 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 248 #endif 249 col = in[l]; bcol = col/bs; 250 if (A->symmetric && brow > bcol) continue; 251 ridx = row % bs; cidx = col % bs; 252 if (roworiented) value = v[l + k*n]; 253 else value = v[k + l*m]; 254 255 if (col <= lastcol) low = 0; 256 else high = nrow; 257 lastcol = col; 258 while (high-low > 7) { 259 t = (low+high)/2; 260 if (rp[t] > bcol) high = t; 261 else low = t; 262 } 263 for (i=low; i<high; i++) { 264 if (rp[i] > bcol) break; 265 if (rp[i] == bcol) goto noinsert1; 266 } 267 if (nonew == 1) goto noinsert1; 268 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 269 MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat); 270 N = nrow++ - 1; high++; 271 /* shift up all the later entries in this row */ 272 for (ii=N; ii>=i; ii--) { 273 rp[ii+1] = rp[ii]; 274 ap[ii+1] = ap[ii]; 275 } 276 if (N>=i) ap[i] = 0; 277 rp[i] = bcol; 278 a->nz++; 279 A->nonzerostate++; 280 noinsert1:; 281 if (!*(ap+i)) { 282 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);CHKERRQ(ierr); 283 } 284 ierr = MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);CHKERRQ(ierr); 285 low = i; 286 } 287 ailen[brow] = nrow; 288 } 289 PetscFunctionReturn(0); 290 } 291 292 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 293 { 294 PetscErrorCode ierr; 295 Mat tmpA; 296 PetscInt i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0; 297 const PetscInt *cols; 298 const PetscScalar *values; 299 PetscBool flg = PETSC_FALSE,notdone; 300 Mat_SeqAIJ *a; 301 Mat_BlockMat *amat; 302 303 PetscFunctionBegin; 304 /* force binary viewer to load .info file if it has not yet done so */ 305 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 306 ierr = MatCreate(PETSC_COMM_SELF,&tmpA);CHKERRQ(ierr); 307 ierr = MatSetType(tmpA,MATSEQAIJ);CHKERRQ(ierr); 308 ierr = MatLoad_SeqAIJ(tmpA,viewer);CHKERRQ(ierr); 309 310 ierr = MatGetLocalSize(tmpA,&m,&n);CHKERRQ(ierr); 311 ierr = PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");CHKERRQ(ierr); 312 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 313 ierr = PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL);CHKERRQ(ierr); 314 ierr = PetscOptionsEnd();CHKERRQ(ierr); 315 316 /* Determine number of nonzero blocks for each block row */ 317 a = (Mat_SeqAIJ*) tmpA->data; 318 mbs = m/bs; 319 ierr = PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens);CHKERRQ(ierr); 320 ierr = PetscArrayzero(lens,mbs);CHKERRQ(ierr); 321 322 for (i=0; i<mbs; i++) { 323 for (j=0; j<bs; j++) { 324 ii[j] = a->j + a->i[i*bs + j]; 325 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 326 } 327 328 currentcol = -1; 329 while (PETSC_TRUE) { 330 notdone = PETSC_FALSE; 331 nextcol = 1000000000; 332 for (j=0; j<bs; j++) { 333 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { 334 ii[j]++; 335 ilens[j]--; 336 } 337 if (ilens[j] > 0) { 338 notdone = PETSC_TRUE; 339 nextcol = PetscMin(nextcol,ii[j][0]/bs); 340 } 341 } 342 if (!notdone) break; 343 if (!flg || (nextcol >= i)) lens[i]++; 344 currentcol = nextcol; 345 } 346 } 347 348 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 349 ierr = MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 350 } 351 ierr = MatBlockMatSetPreallocation(newmat,bs,0,lens);CHKERRQ(ierr); 352 if (flg) { 353 ierr = MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 354 } 355 amat = (Mat_BlockMat*)(newmat)->data; 356 357 /* preallocate the submatrices */ 358 ierr = PetscMalloc1(bs,&llens);CHKERRQ(ierr); 359 for (i=0; i<mbs; i++) { /* loops for block rows */ 360 for (j=0; j<bs; j++) { 361 ii[j] = a->j + a->i[i*bs + j]; 362 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 363 } 364 365 currentcol = 1000000000; 366 for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */ 367 if (ilens[j] > 0) { 368 currentcol = PetscMin(currentcol,ii[j][0]/bs); 369 } 370 } 371 372 while (PETSC_TRUE) { /* loops over blocks in block row */ 373 notdone = PETSC_FALSE; 374 nextcol = 1000000000; 375 ierr = PetscArrayzero(llens,bs);CHKERRQ(ierr); 376 for (j=0; j<bs; j++) { /* loop over rows in block */ 377 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */ 378 ii[j]++; 379 ilens[j]--; 380 llens[j]++; 381 } 382 if (ilens[j] > 0) { 383 notdone = PETSC_TRUE; 384 nextcol = PetscMin(nextcol,ii[j][0]/bs); 385 } 386 } 387 if (cnt >= amat->maxnz) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt); 388 if (!flg || currentcol >= i) { 389 amat->j[cnt] = currentcol; 390 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);CHKERRQ(ierr); 391 } 392 393 if (!notdone) break; 394 currentcol = nextcol; 395 } 396 amat->ilen[i] = lens[i]; 397 } 398 399 ierr = PetscFree3(lens,ii,ilens);CHKERRQ(ierr); 400 ierr = PetscFree(llens);CHKERRQ(ierr); 401 402 /* copy over the matrix, one row at a time */ 403 for (i=0; i<m; i++) { 404 ierr = MatGetRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 405 ierr = MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = MatRestoreRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 407 } 408 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer) 414 { 415 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 416 PetscErrorCode ierr; 417 const char *name; 418 PetscViewerFormat format; 419 420 PetscFunctionBegin; 421 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 422 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 423 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 424 ierr = PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);CHKERRQ(ierr); 425 if (A->symmetric) { 426 ierr = PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");CHKERRQ(ierr); 427 } 428 } 429 PetscFunctionReturn(0); 430 } 431 432 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 433 { 434 PetscErrorCode ierr; 435 Mat_BlockMat *bmat = (Mat_BlockMat*)mat->data; 436 PetscInt i; 437 438 PetscFunctionBegin; 439 ierr = VecDestroy(&bmat->right);CHKERRQ(ierr); 440 ierr = VecDestroy(&bmat->left);CHKERRQ(ierr); 441 ierr = VecDestroy(&bmat->middle);CHKERRQ(ierr); 442 ierr = VecDestroy(&bmat->workb);CHKERRQ(ierr); 443 if (bmat->diags) { 444 for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) { 445 ierr = MatDestroy(&bmat->diags[i]);CHKERRQ(ierr); 446 } 447 } 448 if (bmat->a) { 449 for (i=0; i<bmat->nz; i++) { 450 ierr = MatDestroy(&bmat->a[i]);CHKERRQ(ierr); 451 } 452 } 453 ierr = MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); 454 ierr = PetscFree(mat->data);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y) 459 { 460 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 461 PetscErrorCode ierr; 462 PetscScalar *xx,*yy; 463 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 464 Mat *aa; 465 466 PetscFunctionBegin; 467 /* 468 Standard CSR multiply except each entry is a Mat 469 */ 470 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 471 472 ierr = VecSet(y,0.0);CHKERRQ(ierr); 473 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 474 aj = bmat->j; 475 aa = bmat->a; 476 ii = bmat->i; 477 for (i=0; i<m; i++) { 478 jrow = ii[i]; 479 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 480 n = ii[i+1] - jrow; 481 for (j=0; j<n; j++) { 482 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 483 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 484 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 485 jrow++; 486 } 487 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 488 } 489 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 490 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y) 495 { 496 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 497 PetscErrorCode ierr; 498 PetscScalar *xx,*yy; 499 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 500 Mat *aa; 501 502 PetscFunctionBegin; 503 /* 504 Standard CSR multiply except each entry is a Mat 505 */ 506 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 507 508 ierr = VecSet(y,0.0);CHKERRQ(ierr); 509 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 510 aj = bmat->j; 511 aa = bmat->a; 512 ii = bmat->i; 513 for (i=0; i<m; i++) { 514 jrow = ii[i]; 515 n = ii[i+1] - jrow; 516 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 517 ierr = VecPlaceArray(bmat->middle,xx + bs*i);CHKERRQ(ierr); 518 /* if we ALWAYS required a diagonal entry then could remove this if test */ 519 if (aj[jrow] == i) { 520 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 521 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 522 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 523 jrow++; 524 n--; 525 } 526 for (j=0; j<n; j++) { 527 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); /* upper triangular part */ 528 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 529 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 530 531 ierr = VecPlaceArray(bmat->right,yy + bs*aj[jrow]);CHKERRQ(ierr); /* lower triangular part */ 532 ierr = MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);CHKERRQ(ierr); 533 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 534 jrow++; 535 } 536 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 537 ierr = VecResetArray(bmat->middle);CHKERRQ(ierr); 538 } 539 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 540 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 545 { 546 PetscFunctionBegin; 547 PetscFunctionReturn(0); 548 } 549 550 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y) 551 { 552 PetscFunctionBegin; 553 PetscFunctionReturn(0); 554 } 555 556 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 557 { 558 PetscFunctionBegin; 559 PetscFunctionReturn(0); 560 } 561 562 /* 563 Adds diagonal pointers to sparse matrix structure. 564 */ 565 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 566 { 567 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 568 PetscErrorCode ierr; 569 PetscInt i,j,mbs = A->rmap->n/A->rmap->bs; 570 571 PetscFunctionBegin; 572 if (!a->diag) { 573 ierr = PetscMalloc1(mbs,&a->diag);CHKERRQ(ierr); 574 } 575 for (i=0; i<mbs; i++) { 576 a->diag[i] = a->i[i+1]; 577 for (j=a->i[i]; j<a->i[i+1]; j++) { 578 if (a->j[j] == i) { 579 a->diag[i] = j; 580 break; 581 } 582 } 583 } 584 PetscFunctionReturn(0); 585 } 586 587 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 588 { 589 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 590 Mat_SeqAIJ *c; 591 PetscErrorCode ierr; 592 PetscInt i,k,first,step,lensi,nrows,ncols; 593 PetscInt *j_new,*i_new,*aj = a->j,*ailen = a->ilen; 594 PetscScalar *a_new; 595 Mat C,*aa = a->a; 596 PetscBool stride,equal; 597 598 PetscFunctionBegin; 599 ierr = ISEqual(isrow,iscol,&equal);CHKERRQ(ierr); 600 if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for idential column and row indices"); 601 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);CHKERRQ(ierr); 602 if (!stride) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices"); 603 ierr = ISStrideGetInfo(iscol,&first,&step);CHKERRQ(ierr); 604 if (step != A->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block"); 605 606 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 607 ncols = nrows; 608 609 /* create submatrix */ 610 if (scall == MAT_REUSE_MATRIX) { 611 PetscInt n_cols,n_rows; 612 C = *B; 613 ierr = MatGetSize(C,&n_rows,&n_cols);CHKERRQ(ierr); 614 if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); 615 ierr = MatZeroEntries(C);CHKERRQ(ierr); 616 } else { 617 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 618 ierr = MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 619 if (A->symmetric) { 620 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 621 } else { 622 ierr = MatSetType(C,MATSEQAIJ);CHKERRQ(ierr); 623 } 624 ierr = MatSeqAIJSetPreallocation(C,0,ailen);CHKERRQ(ierr); 625 ierr = MatSeqSBAIJSetPreallocation(C,1,0,ailen);CHKERRQ(ierr); 626 } 627 c = (Mat_SeqAIJ*)C->data; 628 629 /* loop over rows inserting into submatrix */ 630 a_new = c->a; 631 j_new = c->j; 632 i_new = c->i; 633 634 for (i=0; i<nrows; i++) { 635 lensi = ailen[i]; 636 for (k=0; k<lensi; k++) { 637 *j_new++ = *aj++; 638 ierr = MatGetValue(*aa++,first,first,a_new++);CHKERRQ(ierr); 639 } 640 i_new[i+1] = i_new[i] + lensi; 641 c->ilen[i] = lensi; 642 } 643 644 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 645 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 *B = C; 647 PetscFunctionReturn(0); 648 } 649 650 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode) 651 { 652 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 653 PetscErrorCode ierr; 654 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 655 PetscInt m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0; 656 Mat *aa = a->a,*ap; 657 658 PetscFunctionBegin; 659 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 660 661 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 662 for (i=1; i<m; i++) { 663 /* move each row back by the amount of empty slots (fshift) before it*/ 664 fshift += imax[i-1] - ailen[i-1]; 665 rmax = PetscMax(rmax,ailen[i]); 666 if (fshift) { 667 ip = aj + ai[i]; 668 ap = aa + ai[i]; 669 N = ailen[i]; 670 for (j=0; j<N; j++) { 671 ip[j-fshift] = ip[j]; 672 ap[j-fshift] = ap[j]; 673 } 674 } 675 ai[i] = ai[i-1] + ailen[i-1]; 676 } 677 if (m) { 678 fshift += imax[m-1] - ailen[m-1]; 679 ai[m] = ai[m-1] + ailen[m-1]; 680 } 681 /* reset ilen and imax for each row */ 682 for (i=0; i<m; i++) { 683 ailen[i] = imax[i] = ai[i+1] - ai[i]; 684 } 685 a->nz = ai[m]; 686 for (i=0; i<a->nz; i++) { 687 #if defined(PETSC_USE_DEBUG) 688 if (!aa[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz); 689 #endif 690 ierr = MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 } 693 ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);CHKERRQ(ierr); 694 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 695 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);CHKERRQ(ierr); 696 697 A->info.mallocs += a->reallocs; 698 a->reallocs = 0; 699 A->info.nz_unneeded = (double)fshift; 700 a->rmax = rmax; 701 ierr = MatMarkDiagonal_BlockMat(A);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg) 706 { 707 PetscErrorCode ierr; 708 PetscFunctionBegin; 709 if (opt == MAT_SYMMETRIC && flg) { 710 A->ops->sor = MatSOR_BlockMat_Symmetric; 711 A->ops->mult = MatMult_BlockMat_Symmetric; 712 } else { 713 ierr = PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);CHKERRQ(ierr); 714 } 715 PetscFunctionReturn(0); 716 } 717 718 719 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 720 0, 721 0, 722 MatMult_BlockMat, 723 /* 4*/ MatMultAdd_BlockMat, 724 MatMultTranspose_BlockMat, 725 MatMultTransposeAdd_BlockMat, 726 0, 727 0, 728 0, 729 /* 10*/ 0, 730 0, 731 0, 732 MatSOR_BlockMat, 733 0, 734 /* 15*/ 0, 735 0, 736 0, 737 0, 738 0, 739 /* 20*/ 0, 740 MatAssemblyEnd_BlockMat, 741 MatSetOption_BlockMat, 742 0, 743 /* 24*/ 0, 744 0, 745 0, 746 0, 747 0, 748 /* 29*/ 0, 749 0, 750 0, 751 0, 752 0, 753 /* 34*/ 0, 754 0, 755 0, 756 0, 757 0, 758 /* 39*/ 0, 759 0, 760 0, 761 0, 762 0, 763 /* 44*/ 0, 764 0, 765 MatShift_Basic, 766 0, 767 0, 768 /* 49*/ 0, 769 0, 770 0, 771 0, 772 0, 773 /* 54*/ 0, 774 0, 775 0, 776 0, 777 0, 778 /* 59*/ MatCreateSubMatrix_BlockMat, 779 MatDestroy_BlockMat, 780 MatView_BlockMat, 781 0, 782 0, 783 /* 64*/ 0, 784 0, 785 0, 786 0, 787 0, 788 /* 69*/ 0, 789 0, 790 0, 791 0, 792 0, 793 /* 74*/ 0, 794 0, 795 0, 796 0, 797 0, 798 /* 79*/ 0, 799 0, 800 0, 801 0, 802 MatLoad_BlockMat, 803 /* 84*/ 0, 804 0, 805 0, 806 0, 807 0, 808 /* 89*/ 0, 809 0, 810 0, 811 0, 812 0, 813 /* 94*/ 0, 814 0, 815 0, 816 0, 817 0, 818 /* 99*/ 0, 819 0, 820 0, 821 0, 822 0, 823 /*104*/ 0, 824 0, 825 0, 826 0, 827 0, 828 /*109*/ 0, 829 0, 830 0, 831 0, 832 0, 833 /*114*/ 0, 834 0, 835 0, 836 0, 837 0, 838 /*119*/ 0, 839 0, 840 0, 841 0, 842 0, 843 /*124*/ 0, 844 0, 845 0, 846 0, 847 0, 848 /*129*/ 0, 849 0, 850 0, 851 0, 852 0, 853 /*134*/ 0, 854 0, 855 0, 856 0, 857 0, 858 /*139*/ 0, 859 0, 860 0 861 }; 862 863 /*@C 864 MatBlockMatSetPreallocation - For good matrix assembly performance 865 the user should preallocate the matrix storage by setting the parameter nz 866 (or the array nnz). By setting these parameters accurately, performance 867 during matrix assembly can be increased by more than a factor of 50. 868 869 Collective 870 871 Input Parameters: 872 + B - The matrix 873 . bs - size of each block in matrix 874 . nz - number of nonzeros per block row (same for all rows) 875 - nnz - array containing the number of nonzeros in the various block rows 876 (possibly different for each row) or NULL 877 878 Notes: 879 If nnz is given then nz is ignored 880 881 Specify the preallocated storage with either nz or nnz (not both). 882 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 883 allocation. For large problems you MUST preallocate memory or you 884 will get TERRIBLE performance, see the users' manual chapter on matrices. 885 886 Level: intermediate 887 888 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues() 889 890 @*/ 891 PetscErrorCode MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 892 { 893 PetscErrorCode ierr; 894 895 PetscFunctionBegin; 896 ierr = PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz) 901 { 902 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 903 PetscErrorCode ierr; 904 PetscInt i; 905 906 PetscFunctionBegin; 907 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 908 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 909 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 910 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 911 ierr = PetscLayoutGetBlockSize(A->rmap,&bs);CHKERRQ(ierr); 912 913 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 914 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 915 if (nnz) { 916 for (i=0; i<A->rmap->n/bs; i++) { 917 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 918 if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs); 919 } 920 } 921 bmat->mbs = A->rmap->n/bs; 922 923 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right);CHKERRQ(ierr); 924 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle);CHKERRQ(ierr); 925 ierr = VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);CHKERRQ(ierr); 926 927 if (!bmat->imax) { 928 ierr = PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen);CHKERRQ(ierr); 929 ierr = PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 930 } 931 if (nnz) { 932 nz = 0; 933 for (i=0; i<A->rmap->n/A->rmap->bs; i++) { 934 bmat->imax[i] = nnz[i]; 935 nz += nnz[i]; 936 } 937 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation"); 938 939 /* bmat->ilen will count nonzeros in each row so far. */ 940 for (i=0; i<bmat->mbs; i++) bmat->ilen[i] = 0; 941 942 /* allocate the matrix space */ 943 ierr = MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); 944 ierr = PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i);CHKERRQ(ierr); 945 ierr = PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 946 bmat->i[0] = 0; 947 for (i=1; i<bmat->mbs+1; i++) { 948 bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1]; 949 } 950 bmat->singlemalloc = PETSC_TRUE; 951 bmat->free_a = PETSC_TRUE; 952 bmat->free_ij = PETSC_TRUE; 953 954 bmat->nz = 0; 955 bmat->maxnz = nz; 956 A->info.nz_unneeded = (double)bmat->maxnz; 957 ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /*MC 962 MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix 963 consisting of (usually) sparse blocks. 964 965 Level: advanced 966 967 .seealso: MatCreateBlockMat() 968 969 M*/ 970 971 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 972 { 973 Mat_BlockMat *b; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 978 A->data = (void*)b; 979 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 980 981 A->assembled = PETSC_TRUE; 982 A->preallocated = PETSC_FALSE; 983 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);CHKERRQ(ierr); 984 985 ierr = PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 /*@C 990 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object 991 992 Collective 993 994 Input Parameters: 995 + comm - MPI communicator 996 . m - number of rows 997 . n - number of columns 998 . bs - size of each submatrix 999 . nz - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known) 1000 - nnz - expected number of nonzers per block row if known (use NULL otherwise) 1001 1002 1003 Output Parameter: 1004 . A - the matrix 1005 1006 Level: intermediate 1007 1008 Notes: 1009 Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object. Each Mat must 1010 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 1011 1012 For matrices containing parallel submatrices and variable block sizes, see MATNEST. 1013 1014 .seealso: MATBLOCKMAT, MatCreateNest() 1015 @*/ 1016 PetscErrorCode MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A) 1017 { 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1022 ierr = MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1023 ierr = MatSetType(*A,MATBLOCKMAT);CHKERRQ(ierr); 1024 ierr = MatBlockMatSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027