1 2 /* 3 This provides a matrix that consists of Mats 4 */ 5 6 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */ 8 9 typedef struct { 10 SEQAIJHEADER(Mat); 11 SEQBAIJHEADER; 12 Mat *diags; 13 14 Vec left,right,middle,workb; /* dummy vectors to perform local parts of product */ 15 } Mat_BlockMat; 16 17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 18 { 19 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 20 PetscScalar *x; 21 const Mat *v; 22 const PetscScalar *b; 23 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 24 const PetscInt *idx; 25 IS row,col; 26 MatFactorInfo info; 27 Vec left = a->left,right = a->right, middle = a->middle; 28 Mat *diag; 29 30 PetscFunctionBegin; 31 its = its*lits; 32 PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 33 PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 34 PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 35 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 36 if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) { 37 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep"); 38 } 39 40 if (!a->diags) { 41 PetscCall(PetscMalloc1(mbs,&a->diags)); 42 PetscCall(MatFactorInfoInitialize(&info)); 43 for (i=0; i<mbs; i++) { 44 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col)); 45 PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info)); 46 PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info)); 47 PetscCall(ISDestroy(&row)); 48 PetscCall(ISDestroy(&col)); 49 } 50 PetscCall(VecDuplicate(bb,&a->workb)); 51 } 52 diag = a->diags; 53 54 PetscCall(VecSet(xx,0.0)); 55 PetscCall(VecGetArray(xx,&x)); 56 /* copy right hand side because it must be modified during iteration */ 57 PetscCall(VecCopy(bb,a->workb)); 58 PetscCall(VecGetArrayRead(a->workb,&b)); 59 60 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 61 while (its--) { 62 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 63 64 for (i=0; i<mbs; i++) { 65 n = a->i[i+1] - a->i[i] - 1; 66 idx = a->j + a->i[i] + 1; 67 v = a->a + a->i[i] + 1; 68 69 PetscCall(VecSet(left,0.0)); 70 for (j=0; j<n; j++) { 71 PetscCall(VecPlaceArray(right,x + idx[j]*bs)); 72 PetscCall(MatMultAdd(v[j],right,left,left)); 73 PetscCall(VecResetArray(right)); 74 } 75 PetscCall(VecPlaceArray(right,b + i*bs)); 76 PetscCall(VecAYPX(left,-1.0,right)); 77 PetscCall(VecResetArray(right)); 78 79 PetscCall(VecPlaceArray(right,x + i*bs)); 80 PetscCall(MatSolve(diag[i],left,right)); 81 82 /* now adjust right hand side, see MatSOR_SeqSBAIJ */ 83 for (j=0; j<n; j++) { 84 PetscCall(MatMultTranspose(v[j],right,left)); 85 PetscCall(VecPlaceArray(middle,b + idx[j]*bs)); 86 PetscCall(VecAXPY(middle,-1.0,left)); 87 PetscCall(VecResetArray(middle)); 88 } 89 PetscCall(VecResetArray(right)); 90 91 } 92 } 93 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 94 95 for (i=mbs-1; i>=0; i--) { 96 n = a->i[i+1] - a->i[i] - 1; 97 idx = a->j + a->i[i] + 1; 98 v = a->a + a->i[i] + 1; 99 100 PetscCall(VecSet(left,0.0)); 101 for (j=0; j<n; j++) { 102 PetscCall(VecPlaceArray(right,x + idx[j]*bs)); 103 PetscCall(MatMultAdd(v[j],right,left,left)); 104 PetscCall(VecResetArray(right)); 105 } 106 PetscCall(VecPlaceArray(right,b + i*bs)); 107 PetscCall(VecAYPX(left,-1.0,right)); 108 PetscCall(VecResetArray(right)); 109 110 PetscCall(VecPlaceArray(right,x + i*bs)); 111 PetscCall(MatSolve(diag[i],left,right)); 112 PetscCall(VecResetArray(right)); 113 114 } 115 } 116 } 117 PetscCall(VecRestoreArray(xx,&x)); 118 PetscCall(VecRestoreArrayRead(a->workb,&b)); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 123 { 124 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 125 PetscScalar *x; 126 const Mat *v; 127 const PetscScalar *b; 128 PetscInt n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs; 129 const PetscInt *idx; 130 IS row,col; 131 MatFactorInfo info; 132 Vec left = a->left,right = a->right; 133 Mat *diag; 134 135 PetscFunctionBegin; 136 its = its*lits; 137 PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 138 PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 139 PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 140 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift"); 141 142 if (!a->diags) { 143 PetscCall(PetscMalloc1(mbs,&a->diags)); 144 PetscCall(MatFactorInfoInitialize(&info)); 145 for (i=0; i<mbs; i++) { 146 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col)); 147 PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info)); 148 PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info)); 149 PetscCall(ISDestroy(&row)); 150 PetscCall(ISDestroy(&col)); 151 } 152 } 153 diag = a->diags; 154 155 PetscCall(VecSet(xx,0.0)); 156 PetscCall(VecGetArray(xx,&x)); 157 PetscCall(VecGetArrayRead(bb,&b)); 158 159 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */ 160 while (its--) { 161 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 162 163 for (i=0; i<mbs; i++) { 164 n = a->i[i+1] - a->i[i]; 165 idx = a->j + a->i[i]; 166 v = a->a + a->i[i]; 167 168 PetscCall(VecSet(left,0.0)); 169 for (j=0; j<n; j++) { 170 if (idx[j] != i) { 171 PetscCall(VecPlaceArray(right,x + idx[j]*bs)); 172 PetscCall(MatMultAdd(v[j],right,left,left)); 173 PetscCall(VecResetArray(right)); 174 } 175 } 176 PetscCall(VecPlaceArray(right,b + i*bs)); 177 PetscCall(VecAYPX(left,-1.0,right)); 178 PetscCall(VecResetArray(right)); 179 180 PetscCall(VecPlaceArray(right,x + i*bs)); 181 PetscCall(MatSolve(diag[i],left,right)); 182 PetscCall(VecResetArray(right)); 183 } 184 } 185 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 186 187 for (i=mbs-1; i>=0; i--) { 188 n = a->i[i+1] - a->i[i]; 189 idx = a->j + a->i[i]; 190 v = a->a + a->i[i]; 191 192 PetscCall(VecSet(left,0.0)); 193 for (j=0; j<n; j++) { 194 if (idx[j] != i) { 195 PetscCall(VecPlaceArray(right,x + idx[j]*bs)); 196 PetscCall(MatMultAdd(v[j],right,left,left)); 197 PetscCall(VecResetArray(right)); 198 } 199 } 200 PetscCall(VecPlaceArray(right,b + i*bs)); 201 PetscCall(VecAYPX(left,-1.0,right)); 202 PetscCall(VecResetArray(right)); 203 204 PetscCall(VecPlaceArray(right,x + i*bs)); 205 PetscCall(MatSolve(diag[i],left,right)); 206 PetscCall(VecResetArray(right)); 207 208 } 209 } 210 } 211 PetscCall(VecRestoreArray(xx,&x)); 212 PetscCall(VecRestoreArrayRead(bb,&b)); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 217 { 218 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 219 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 220 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 221 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 222 PetscInt ridx,cidx; 223 PetscBool roworiented=a->roworiented; 224 MatScalar value; 225 Mat *ap,*aa = a->a; 226 227 PetscFunctionBegin; 228 for (k=0; k<m; k++) { /* loop over added rows */ 229 row = im[k]; 230 brow = row/bs; 231 if (row < 0) continue; 232 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 233 rp = aj + ai[brow]; 234 ap = aa + ai[brow]; 235 rmax = imax[brow]; 236 nrow = ailen[brow]; 237 low = 0; 238 high = nrow; 239 for (l=0; l<n; l++) { /* loop over added columns */ 240 if (in[l] < 0) continue; 241 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 242 col = in[l]; bcol = col/bs; 243 if (A->symmetric && brow > bcol) continue; 244 ridx = row % bs; cidx = col % bs; 245 if (roworiented) value = v[l + k*n]; 246 else value = v[k + l*m]; 247 248 if (col <= lastcol) low = 0; 249 else high = nrow; 250 lastcol = col; 251 while (high-low > 7) { 252 t = (low+high)/2; 253 if (rp[t] > bcol) high = t; 254 else low = t; 255 } 256 for (i=low; i<high; i++) { 257 if (rp[i] > bcol) break; 258 if (rp[i] == bcol) goto noinsert1; 259 } 260 if (nonew == 1) goto noinsert1; 261 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 262 MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat); 263 N = nrow++ - 1; high++; 264 /* shift up all the later entries in this row */ 265 for (ii=N; ii>=i; ii--) { 266 rp[ii+1] = rp[ii]; 267 ap[ii+1] = ap[ii]; 268 } 269 if (N>=i) ap[i] = NULL; 270 rp[i] = bcol; 271 a->nz++; 272 A->nonzerostate++; 273 noinsert1:; 274 if (!*(ap+i)) { 275 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i)); 276 } 277 PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is)); 278 low = i; 279 } 280 ailen[brow] = nrow; 281 } 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) 286 { 287 PetscErrorCode ierr; 288 Mat tmpA; 289 PetscInt i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0; 290 const PetscInt *cols; 291 const PetscScalar *values; 292 PetscBool flg = PETSC_FALSE,notdone; 293 Mat_SeqAIJ *a; 294 Mat_BlockMat *amat; 295 296 PetscFunctionBegin; 297 /* force binary viewer to load .info file if it has not yet done so */ 298 PetscCall(PetscViewerSetUp(viewer)); 299 PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA)); 300 PetscCall(MatSetType(tmpA,MATSEQAIJ)); 301 PetscCall(MatLoad_SeqAIJ(tmpA,viewer)); 302 303 PetscCall(MatGetLocalSize(tmpA,&m,&n)); 304 ierr = PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");PetscCall(ierr); 305 PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL)); 306 PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL)); 307 ierr = PetscOptionsEnd();PetscCall(ierr); 308 309 /* Determine number of nonzero blocks for each block row */ 310 a = (Mat_SeqAIJ*) tmpA->data; 311 mbs = m/bs; 312 PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens)); 313 PetscCall(PetscArrayzero(lens,mbs)); 314 315 for (i=0; i<mbs; i++) { 316 for (j=0; j<bs; j++) { 317 ii[j] = a->j + a->i[i*bs + j]; 318 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 319 } 320 321 currentcol = -1; 322 while (PETSC_TRUE) { 323 notdone = PETSC_FALSE; 324 nextcol = 1000000000; 325 for (j=0; j<bs; j++) { 326 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { 327 ii[j]++; 328 ilens[j]--; 329 } 330 if (ilens[j] > 0) { 331 notdone = PETSC_TRUE; 332 nextcol = PetscMin(nextcol,ii[j][0]/bs); 333 } 334 } 335 if (!notdone) break; 336 if (!flg || (nextcol >= i)) lens[i]++; 337 currentcol = nextcol; 338 } 339 } 340 341 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 342 PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 343 } 344 PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens)); 345 if (flg) { 346 PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE)); 347 } 348 amat = (Mat_BlockMat*)(newmat)->data; 349 350 /* preallocate the submatrices */ 351 PetscCall(PetscMalloc1(bs,&llens)); 352 for (i=0; i<mbs; i++) { /* loops for block rows */ 353 for (j=0; j<bs; j++) { 354 ii[j] = a->j + a->i[i*bs + j]; 355 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 356 } 357 358 currentcol = 1000000000; 359 for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */ 360 if (ilens[j] > 0) { 361 currentcol = PetscMin(currentcol,ii[j][0]/bs); 362 } 363 } 364 365 while (PETSC_TRUE) { /* loops over blocks in block row */ 366 notdone = PETSC_FALSE; 367 nextcol = 1000000000; 368 PetscCall(PetscArrayzero(llens,bs)); 369 for (j=0; j<bs; j++) { /* loop over rows in block */ 370 while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */ 371 ii[j]++; 372 ilens[j]--; 373 llens[j]++; 374 } 375 if (ilens[j] > 0) { 376 notdone = PETSC_TRUE; 377 nextcol = PetscMin(nextcol,ii[j][0]/bs); 378 } 379 } 380 PetscCheckFalse(cnt >= amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt); 381 if (!flg || currentcol >= i) { 382 amat->j[cnt] = currentcol; 383 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++)); 384 } 385 386 if (!notdone) break; 387 currentcol = nextcol; 388 } 389 amat->ilen[i] = lens[i]; 390 } 391 392 PetscCall(PetscFree3(lens,ii,ilens)); 393 PetscCall(PetscFree(llens)); 394 395 /* copy over the matrix, one row at a time */ 396 for (i=0; i<m; i++) { 397 PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values)); 398 PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES)); 399 PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values)); 400 } 401 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 402 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 403 PetscFunctionReturn(0); 404 } 405 406 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer) 407 { 408 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 409 const char *name; 410 PetscViewerFormat format; 411 412 PetscFunctionBegin; 413 PetscCall(PetscObjectGetName((PetscObject)A,&name)); 414 PetscCall(PetscViewerGetFormat(viewer,&format)); 415 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 416 PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz)); 417 if (A->symmetric) { 418 PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n")); 419 } 420 } 421 PetscFunctionReturn(0); 422 } 423 424 static PetscErrorCode MatDestroy_BlockMat(Mat mat) 425 { 426 Mat_BlockMat *bmat = (Mat_BlockMat*)mat->data; 427 PetscInt i; 428 429 PetscFunctionBegin; 430 PetscCall(VecDestroy(&bmat->right)); 431 PetscCall(VecDestroy(&bmat->left)); 432 PetscCall(VecDestroy(&bmat->middle)); 433 PetscCall(VecDestroy(&bmat->workb)); 434 if (bmat->diags) { 435 for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) { 436 PetscCall(MatDestroy(&bmat->diags[i])); 437 } 438 } 439 if (bmat->a) { 440 for (i=0; i<bmat->nz; i++) { 441 PetscCall(MatDestroy(&bmat->a[i])); 442 } 443 } 444 PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i)); 445 PetscCall(PetscFree(mat->data)); 446 PetscFunctionReturn(0); 447 } 448 449 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y) 450 { 451 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 452 PetscScalar *xx,*yy; 453 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 454 Mat *aa; 455 456 PetscFunctionBegin; 457 /* 458 Standard CSR multiply except each entry is a Mat 459 */ 460 PetscCall(VecGetArray(x,&xx)); 461 462 PetscCall(VecSet(y,0.0)); 463 PetscCall(VecGetArray(y,&yy)); 464 aj = bmat->j; 465 aa = bmat->a; 466 ii = bmat->i; 467 for (i=0; i<m; i++) { 468 jrow = ii[i]; 469 PetscCall(VecPlaceArray(bmat->left,yy + bs*i)); 470 n = ii[i+1] - jrow; 471 for (j=0; j<n; j++) { 472 PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow])); 473 PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left)); 474 PetscCall(VecResetArray(bmat->right)); 475 jrow++; 476 } 477 PetscCall(VecResetArray(bmat->left)); 478 } 479 PetscCall(VecRestoreArray(x,&xx)); 480 PetscCall(VecRestoreArray(y,&yy)); 481 PetscFunctionReturn(0); 482 } 483 484 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y) 485 { 486 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 487 PetscScalar *xx,*yy; 488 PetscInt *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j; 489 Mat *aa; 490 491 PetscFunctionBegin; 492 /* 493 Standard CSR multiply except each entry is a Mat 494 */ 495 PetscCall(VecGetArray(x,&xx)); 496 497 PetscCall(VecSet(y,0.0)); 498 PetscCall(VecGetArray(y,&yy)); 499 aj = bmat->j; 500 aa = bmat->a; 501 ii = bmat->i; 502 for (i=0; i<m; i++) { 503 jrow = ii[i]; 504 n = ii[i+1] - jrow; 505 PetscCall(VecPlaceArray(bmat->left,yy + bs*i)); 506 PetscCall(VecPlaceArray(bmat->middle,xx + bs*i)); 507 /* if we ALWAYS required a diagonal entry then could remove this if test */ 508 if (aj[jrow] == i) { 509 PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow])); 510 PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left)); 511 PetscCall(VecResetArray(bmat->right)); 512 jrow++; 513 n--; 514 } 515 for (j=0; j<n; j++) { 516 PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow])); /* upper triangular part */ 517 PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left)); 518 PetscCall(VecResetArray(bmat->right)); 519 520 PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow])); /* lower triangular part */ 521 PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right)); 522 PetscCall(VecResetArray(bmat->right)); 523 jrow++; 524 } 525 PetscCall(VecResetArray(bmat->left)); 526 PetscCall(VecResetArray(bmat->middle)); 527 } 528 PetscCall(VecRestoreArray(x,&xx)); 529 PetscCall(VecRestoreArray(y,&yy)); 530 PetscFunctionReturn(0); 531 } 532 533 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 534 { 535 PetscFunctionBegin; 536 PetscFunctionReturn(0); 537 } 538 539 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y) 540 { 541 PetscFunctionBegin; 542 PetscFunctionReturn(0); 543 } 544 545 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 546 { 547 PetscFunctionBegin; 548 PetscFunctionReturn(0); 549 } 550 551 /* 552 Adds diagonal pointers to sparse matrix structure. 553 */ 554 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 555 { 556 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 557 PetscInt i,j,mbs = A->rmap->n/A->rmap->bs; 558 559 PetscFunctionBegin; 560 if (!a->diag) { 561 PetscCall(PetscMalloc1(mbs,&a->diag)); 562 } 563 for (i=0; i<mbs; i++) { 564 a->diag[i] = a->i[i+1]; 565 for (j=a->i[i]; j<a->i[i+1]; j++) { 566 if (a->j[j] == i) { 567 a->diag[i] = j; 568 break; 569 } 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 576 { 577 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 578 Mat_SeqAIJ *c; 579 PetscInt i,k,first,step,lensi,nrows,ncols; 580 PetscInt *j_new,*i_new,*aj = a->j,*ailen = a->ilen; 581 PetscScalar *a_new; 582 Mat C,*aa = a->a; 583 PetscBool stride,equal; 584 585 PetscFunctionBegin; 586 PetscCall(ISEqual(isrow,iscol,&equal)); 587 PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices"); 588 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride)); 589 PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices"); 590 PetscCall(ISStrideGetInfo(iscol,&first,&step)); 591 PetscCheckFalse(step != A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block"); 592 593 PetscCall(ISGetLocalSize(isrow,&nrows)); 594 ncols = nrows; 595 596 /* create submatrix */ 597 if (scall == MAT_REUSE_MATRIX) { 598 PetscInt n_cols,n_rows; 599 C = *B; 600 PetscCall(MatGetSize(C,&n_rows,&n_cols)); 601 PetscCheckFalse(n_rows != nrows || n_cols != ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); 602 PetscCall(MatZeroEntries(C)); 603 } else { 604 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); 605 PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE)); 606 if (A->symmetric) { 607 PetscCall(MatSetType(C,MATSEQSBAIJ)); 608 } else { 609 PetscCall(MatSetType(C,MATSEQAIJ)); 610 } 611 PetscCall(MatSeqAIJSetPreallocation(C,0,ailen)); 612 PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen)); 613 } 614 c = (Mat_SeqAIJ*)C->data; 615 616 /* loop over rows inserting into submatrix */ 617 a_new = c->a; 618 j_new = c->j; 619 i_new = c->i; 620 621 for (i=0; i<nrows; i++) { 622 lensi = ailen[i]; 623 for (k=0; k<lensi; k++) { 624 *j_new++ = *aj++; 625 PetscCall(MatGetValue(*aa++,first,first,a_new++)); 626 } 627 i_new[i+1] = i_new[i] + lensi; 628 c->ilen[i] = lensi; 629 } 630 631 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 632 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 633 *B = C; 634 PetscFunctionReturn(0); 635 } 636 637 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode) 638 { 639 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 640 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 641 PetscInt m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0; 642 Mat *aa = a->a,*ap; 643 644 PetscFunctionBegin; 645 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 646 647 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 648 for (i=1; i<m; i++) { 649 /* move each row back by the amount of empty slots (fshift) before it*/ 650 fshift += imax[i-1] - ailen[i-1]; 651 rmax = PetscMax(rmax,ailen[i]); 652 if (fshift) { 653 ip = aj + ai[i]; 654 ap = aa + ai[i]; 655 N = ailen[i]; 656 for (j=0; j<N; j++) { 657 ip[j-fshift] = ip[j]; 658 ap[j-fshift] = ap[j]; 659 } 660 } 661 ai[i] = ai[i-1] + ailen[i-1]; 662 } 663 if (m) { 664 fshift += imax[m-1] - ailen[m-1]; 665 ai[m] = ai[m-1] + ailen[m-1]; 666 } 667 /* reset ilen and imax for each row */ 668 for (i=0; i<m; i++) { 669 ailen[i] = imax[i] = ai[i+1] - ai[i]; 670 } 671 a->nz = ai[m]; 672 for (i=0; i<a->nz; i++) { 673 PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz); 674 PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY)); 675 PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY)); 676 } 677 PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz)); 678 PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs)); 679 PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax)); 680 681 A->info.mallocs += a->reallocs; 682 a->reallocs = 0; 683 A->info.nz_unneeded = (double)fshift; 684 a->rmax = rmax; 685 PetscCall(MatMarkDiagonal_BlockMat(A)); 686 PetscFunctionReturn(0); 687 } 688 689 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg) 690 { 691 PetscFunctionBegin; 692 if (opt == MAT_SYMMETRIC && flg) { 693 A->ops->sor = MatSOR_BlockMat_Symmetric; 694 A->ops->mult = MatMult_BlockMat_Symmetric; 695 } else { 696 PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt])); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 702 NULL, 703 NULL, 704 MatMult_BlockMat, 705 /* 4*/ MatMultAdd_BlockMat, 706 MatMultTranspose_BlockMat, 707 MatMultTransposeAdd_BlockMat, 708 NULL, 709 NULL, 710 NULL, 711 /* 10*/ NULL, 712 NULL, 713 NULL, 714 MatSOR_BlockMat, 715 NULL, 716 /* 15*/ NULL, 717 NULL, 718 NULL, 719 NULL, 720 NULL, 721 /* 20*/ NULL, 722 MatAssemblyEnd_BlockMat, 723 MatSetOption_BlockMat, 724 NULL, 725 /* 24*/ NULL, 726 NULL, 727 NULL, 728 NULL, 729 NULL, 730 /* 29*/ NULL, 731 NULL, 732 NULL, 733 NULL, 734 NULL, 735 /* 34*/ NULL, 736 NULL, 737 NULL, 738 NULL, 739 NULL, 740 /* 39*/ NULL, 741 NULL, 742 NULL, 743 NULL, 744 NULL, 745 /* 44*/ NULL, 746 NULL, 747 MatShift_Basic, 748 NULL, 749 NULL, 750 /* 49*/ NULL, 751 NULL, 752 NULL, 753 NULL, 754 NULL, 755 /* 54*/ NULL, 756 NULL, 757 NULL, 758 NULL, 759 NULL, 760 /* 59*/ MatCreateSubMatrix_BlockMat, 761 MatDestroy_BlockMat, 762 MatView_BlockMat, 763 NULL, 764 NULL, 765 /* 64*/ NULL, 766 NULL, 767 NULL, 768 NULL, 769 NULL, 770 /* 69*/ NULL, 771 NULL, 772 NULL, 773 NULL, 774 NULL, 775 /* 74*/ NULL, 776 NULL, 777 NULL, 778 NULL, 779 NULL, 780 /* 79*/ NULL, 781 NULL, 782 NULL, 783 NULL, 784 MatLoad_BlockMat, 785 /* 84*/ NULL, 786 NULL, 787 NULL, 788 NULL, 789 NULL, 790 /* 89*/ NULL, 791 NULL, 792 NULL, 793 NULL, 794 NULL, 795 /* 94*/ NULL, 796 NULL, 797 NULL, 798 NULL, 799 NULL, 800 /* 99*/ NULL, 801 NULL, 802 NULL, 803 NULL, 804 NULL, 805 /*104*/ NULL, 806 NULL, 807 NULL, 808 NULL, 809 NULL, 810 /*109*/ NULL, 811 NULL, 812 NULL, 813 NULL, 814 NULL, 815 /*114*/ NULL, 816 NULL, 817 NULL, 818 NULL, 819 NULL, 820 /*119*/ NULL, 821 NULL, 822 NULL, 823 NULL, 824 NULL, 825 /*124*/ NULL, 826 NULL, 827 NULL, 828 NULL, 829 NULL, 830 /*129*/ NULL, 831 NULL, 832 NULL, 833 NULL, 834 NULL, 835 /*134*/ NULL, 836 NULL, 837 NULL, 838 NULL, 839 NULL, 840 /*139*/ NULL, 841 NULL, 842 NULL, 843 NULL, 844 NULL, 845 /*144*/ NULL, 846 NULL, 847 NULL, 848 NULL 849 }; 850 851 /*@C 852 MatBlockMatSetPreallocation - For good matrix assembly performance 853 the user should preallocate the matrix storage by setting the parameter nz 854 (or the array nnz). By setting these parameters accurately, performance 855 during matrix assembly can be increased by more than a factor of 50. 856 857 Collective 858 859 Input Parameters: 860 + B - The matrix 861 . bs - size of each block in matrix 862 . nz - number of nonzeros per block row (same for all rows) 863 - nnz - array containing the number of nonzeros in the various block rows 864 (possibly different for each row) or NULL 865 866 Notes: 867 If nnz is given then nz is ignored 868 869 Specify the preallocated storage with either nz or nnz (not both). 870 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 871 allocation. For large problems you MUST preallocate memory or you 872 will get TERRIBLE performance, see the users' manual chapter on matrices. 873 874 Level: intermediate 875 876 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues() 877 878 @*/ 879 PetscErrorCode MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 880 { 881 PetscFunctionBegin; 882 PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)); 883 PetscFunctionReturn(0); 884 } 885 886 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz) 887 { 888 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 889 PetscInt i; 890 891 PetscFunctionBegin; 892 PetscCall(PetscLayoutSetBlockSize(A->rmap,bs)); 893 PetscCall(PetscLayoutSetBlockSize(A->cmap,bs)); 894 PetscCall(PetscLayoutSetUp(A->rmap)); 895 PetscCall(PetscLayoutSetUp(A->cmap)); 896 PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs)); 897 898 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 899 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 900 if (nnz) { 901 for (i=0; i<A->rmap->n/bs; i++) { 902 PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 903 PetscCheckFalse(nnz[i] > A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs); 904 } 905 } 906 bmat->mbs = A->rmap->n/bs; 907 908 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right)); 909 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle)); 910 PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left)); 911 912 if (!bmat->imax) { 913 PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen)); 914 PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt))); 915 } 916 if (PetscLikely(nnz)) { 917 nz = 0; 918 for (i=0; i<A->rmap->n/A->rmap->bs; i++) { 919 bmat->imax[i] = nnz[i]; 920 nz += nnz[i]; 921 } 922 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation"); 923 924 /* bmat->ilen will count nonzeros in each row so far. */ 925 PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs)); 926 927 /* allocate the matrix space */ 928 PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i)); 929 PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i)); 930 PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)))); 931 bmat->i[0] = 0; 932 for (i=1; i<bmat->mbs+1; i++) { 933 bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1]; 934 } 935 bmat->singlemalloc = PETSC_TRUE; 936 bmat->free_a = PETSC_TRUE; 937 bmat->free_ij = PETSC_TRUE; 938 939 bmat->nz = 0; 940 bmat->maxnz = nz; 941 A->info.nz_unneeded = (double)bmat->maxnz; 942 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 943 PetscFunctionReturn(0); 944 } 945 946 /*MC 947 MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix 948 consisting of (usually) sparse blocks. 949 950 Level: advanced 951 952 .seealso: MatCreateBlockMat() 953 954 M*/ 955 956 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) 957 { 958 Mat_BlockMat *b; 959 960 PetscFunctionBegin; 961 PetscCall(PetscNewLog(A,&b)); 962 A->data = (void*)b; 963 PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 964 965 A->assembled = PETSC_TRUE; 966 A->preallocated = PETSC_FALSE; 967 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT)); 968 969 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat)); 970 PetscFunctionReturn(0); 971 } 972 973 /*@C 974 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object 975 976 Collective 977 978 Input Parameters: 979 + comm - MPI communicator 980 . m - number of rows 981 . n - number of columns 982 . bs - size of each submatrix 983 . nz - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known) 984 - nnz - expected number of nonzers per block row if known (use NULL otherwise) 985 986 Output Parameter: 987 . A - the matrix 988 989 Level: intermediate 990 991 Notes: 992 Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object. Each Mat must 993 have the same size and be sequential. The local and global sizes must be compatible with this decomposition. 994 995 For matrices containing parallel submatrices and variable block sizes, see MATNEST. 996 997 .seealso: MATBLOCKMAT, MatCreateNest() 998 @*/ 999 PetscErrorCode MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A) 1000 { 1001 PetscFunctionBegin; 1002 PetscCall(MatCreate(comm,A)); 1003 PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 1004 PetscCall(MatSetType(*A,MATBLOCKMAT)); 1005 PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz)); 1006 PetscFunctionReturn(0); 1007 } 1008