1 #define PETSCMAT_DLL 2 3 /* 4 This provides a matrix that consists of Mats 5 */ 6 7 #include "src/mat/matimpl.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" /* use the common AIJ data-structure */ 9 #include "petscksp.h" 10 11 #define CHUNKSIZE 15 12 13 typedef struct { 14 SEQAIJHEADER(Mat); 15 SEQBAIJHEADER; 16 Mat *diags; 17 18 Vec left,right,middle,workb; /* dummy vectors to perform local parts of product */ 19 } Mat_BlockMat; 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatRelax_BlockMat_Symmetric" 23 PetscErrorCode MatRelax_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 24 { 25 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 26 PetscScalar *x; 27 const Mat *v = a->a; 28 const PetscScalar *b; 29 PetscErrorCode ierr; 30 PetscInt n = A->cmap.n,i,mbs = n/A->rmap.bs,j,bs = A->rmap.bs; 31 const PetscInt *idx; 32 IS row,col; 33 MatFactorInfo info; 34 Vec left = a->left,right = a->right, middle = a->middle; 35 Mat *diag; 36 37 PetscFunctionBegin; 38 its = its*lits; 39 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 40 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 41 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 42 if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift"); 43 if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) 44 SETERRQ(PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep"); 45 46 if (!a->diags) { 47 ierr = PetscMalloc(mbs*sizeof(Mat),&a->diags);CHKERRQ(ierr); 48 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 49 for (i=0; i<mbs; i++) { 50 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);CHKERRQ(ierr); 51 ierr = MatCholeskyFactorSymbolic(a->a[a->diag[i]],row,&info,a->diags+i);CHKERRQ(ierr); 52 ierr = MatCholeskyFactorNumeric(a->a[a->diag[i]],&info,a->diags+i);CHKERRQ(ierr); 53 ierr = ISDestroy(row);CHKERRQ(ierr); 54 ierr = ISDestroy(col);CHKERRQ(ierr); 55 } 56 ierr = VecDuplicate(bb,&a->workb);CHKERRQ(ierr); 57 } 58 diag = a->diags; 59 60 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 61 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 62 /* copy right hand side because it must be modified during iteration */ 63 ierr = VecCopy(bb,a->workb);CHKERRQ(ierr); 64 ierr = VecGetArray(a->workb,(PetscScalar**)&b);CHKERRQ(ierr); 65 66 /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */ 67 while (its--) { 68 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 69 70 for (i=0; i<mbs; i++) { 71 n = a->i[i+1] - a->i[i] - 1; 72 idx = a->j + a->i[i] + 1; 73 v = a->a + a->i[i] + 1; 74 75 ierr = VecSet(left,0.0);CHKERRQ(ierr); 76 for (j=0; j<n; j++) { 77 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 78 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 79 ierr = VecResetArray(right);CHKERRQ(ierr); 80 } 81 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 82 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 83 ierr = VecResetArray(right);CHKERRQ(ierr); 84 85 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 86 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 87 88 /* now adjust right hand side, see MatRelax_SeqSBAIJ */ 89 for (j=0; j<n; j++) { 90 ierr = MatMultTranspose(v[j],right,left);CHKERRQ(ierr); 91 ierr = VecPlaceArray(middle,b + idx[j]*bs);CHKERRQ(ierr); 92 ierr = VecAXPY(middle,-1.0,left);CHKERRQ(ierr); 93 ierr = VecResetArray(middle);CHKERRQ(ierr); 94 } 95 ierr = VecResetArray(right);CHKERRQ(ierr); 96 97 } 98 } 99 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 100 101 for (i=mbs-1; i>=0; i--) { 102 n = a->i[i+1] - a->i[i] - 1; 103 idx = a->j + a->i[i] + 1; 104 v = a->a + a->i[i] + 1; 105 106 ierr = VecSet(left,0.0);CHKERRQ(ierr); 107 for (j=0; j<n; j++) { 108 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 109 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 110 ierr = VecResetArray(right);CHKERRQ(ierr); 111 } 112 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 113 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 114 ierr = VecResetArray(right);CHKERRQ(ierr); 115 116 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 117 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 118 ierr = VecResetArray(right);CHKERRQ(ierr); 119 120 } 121 } 122 } 123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 124 ierr = VecRestoreArray(a->workb,(PetscScalar**)&b);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatRelax_BlockMat" 130 PetscErrorCode MatRelax_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 131 { 132 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 133 PetscScalar *x; 134 const Mat *v = a->a; 135 const PetscScalar *b; 136 PetscErrorCode ierr; 137 PetscInt n = A->cmap.n,i,mbs = n/A->rmap.bs,j,bs = A->rmap.bs; 138 const PetscInt *idx; 139 IS row,col; 140 MatFactorInfo info; 141 Vec left = a->left,right = a->right; 142 Mat *diag; 143 144 PetscFunctionBegin; 145 its = its*lits; 146 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 147 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 148 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"No support yet for omega not equal to 1.0"); 149 if (fshift) SETERRQ(PETSC_ERR_SUP,"No support yet for fshift"); 150 151 if (!a->diags) { 152 ierr = PetscMalloc(mbs*sizeof(Mat),&a->diags);CHKERRQ(ierr); 153 ierr = MatFactorInfoInitialize(&info);CHKERRQ(ierr); 154 for (i=0; i<mbs; i++) { 155 ierr = MatGetOrdering(a->a[a->diag[i]], MATORDERING_ND,&row,&col);CHKERRQ(ierr); 156 ierr = MatLUFactorSymbolic(a->a[a->diag[i]],row,col,&info,a->diags+i);CHKERRQ(ierr); 157 ierr = MatLUFactorNumeric(a->a[a->diag[i]],&info,a->diags+i);CHKERRQ(ierr); 158 ierr = ISDestroy(row);CHKERRQ(ierr); 159 ierr = ISDestroy(col);CHKERRQ(ierr); 160 } 161 } 162 diag = a->diags; 163 164 ierr = VecSet(xx,0.0);CHKERRQ(ierr); 165 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 166 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 167 168 /* need to add code for when initial guess is zero, see MatRelax_SeqAIJ */ 169 while (its--) { 170 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 171 172 for (i=0; i<mbs; i++) { 173 n = a->i[i+1] - a->i[i]; 174 idx = a->j + a->i[i]; 175 v = a->a + a->i[i]; 176 177 ierr = VecSet(left,0.0);CHKERRQ(ierr); 178 for (j=0; j<n; j++) { 179 if (idx[j] != i) { 180 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 181 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 182 ierr = VecResetArray(right);CHKERRQ(ierr); 183 } 184 } 185 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 186 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 187 ierr = VecResetArray(right);CHKERRQ(ierr); 188 189 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 190 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 191 ierr = VecResetArray(right);CHKERRQ(ierr); 192 } 193 } 194 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 195 196 for (i=mbs-1; i>=0; i--) { 197 n = a->i[i+1] - a->i[i]; 198 idx = a->j + a->i[i]; 199 v = a->a + a->i[i]; 200 201 ierr = VecSet(left,0.0);CHKERRQ(ierr); 202 for (j=0; j<n; j++) { 203 if (idx[j] != i) { 204 ierr = VecPlaceArray(right,x + idx[j]*bs);CHKERRQ(ierr); 205 ierr = MatMultAdd(v[j],right,left,left);CHKERRQ(ierr); 206 ierr = VecResetArray(right);CHKERRQ(ierr); 207 } 208 } 209 ierr = VecPlaceArray(right,b + i*bs);CHKERRQ(ierr); 210 ierr = VecAYPX(left,-1.0,right);CHKERRQ(ierr); 211 ierr = VecResetArray(right);CHKERRQ(ierr); 212 213 ierr = VecPlaceArray(right,x + i*bs);CHKERRQ(ierr); 214 ierr = MatSolve(diag[i],left,right);CHKERRQ(ierr); 215 ierr = VecResetArray(right);CHKERRQ(ierr); 216 217 } 218 } 219 } 220 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 221 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatSetValues_BlockMat" 227 PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 228 { 229 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 230 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 231 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 232 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 233 PetscErrorCode ierr; 234 PetscInt ridx,cidx; 235 PetscTruth roworiented=a->roworiented; 236 MatScalar value; 237 Mat *ap,*aa = a->a; 238 239 PetscFunctionBegin; 240 for (k=0; k<m; k++) { /* loop over added rows */ 241 row = im[k]; 242 brow = row/bs; 243 if (row < 0) continue; 244 #if defined(PETSC_USE_DEBUG) 245 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 246 #endif 247 rp = aj + ai[brow]; 248 ap = aa + ai[brow]; 249 rmax = imax[brow]; 250 nrow = ailen[brow]; 251 low = 0; 252 high = nrow; 253 for (l=0; l<n; l++) { /* loop over added columns */ 254 if (in[l] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 257 #endif 258 col = in[l]; bcol = col/bs; 259 if (A->symmetric && brow > bcol) continue; 260 ridx = row % bs; cidx = col % bs; 261 if (roworiented) { 262 value = v[l + k*n]; 263 } else { 264 value = v[k + l*m]; 265 } 266 if (col <= lastcol) low = 0; else high = nrow; 267 lastcol = col; 268 while (high-low > 7) { 269 t = (low+high)/2; 270 if (rp[t] > bcol) high = t; 271 else low = t; 272 } 273 for (i=low; i<high; i++) { 274 if (rp[i] > bcol) break; 275 if (rp[i] == bcol) { 276 /* printf("row %d col %d found i %d\n",brow,bcol,i);*/ 277 goto noinsert1; 278 } 279 } 280 if (nonew == 1) goto noinsert1; 281 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 282 MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat); 283 N = nrow++ - 1; high++; 284 /* shift up all the later entries in this row */ 285 /* printf("N %d i %d\n",N,i);*/ 286 for (ii=N; ii>=i; ii--) { 287 rp[ii+1] = rp[ii]; 288 ap[ii+1] = ap[ii]; 289 } 290 if (N>=i) ap[i] = 0; 291 rp[i] = bcol; 292 a->nz++; 293 noinsert1:; 294 if (!*(ap+i)) { 295 /* printf("create matrix at i %d rw %d col %d\n",i,brow,bcol);*/ 296 if (A->symmetric && brow == bcol) { 297 /* don't use SBAIJ since want to reorder in sparse factorization */ 298 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);CHKERRQ(ierr); 299 } else { 300 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);CHKERRQ(ierr); 301 } 302 } 303 /* printf("numerical value at i %d row %d col %d cidx %d ridx %d value %g\n",i,brow,bcol,cidx,ridx,value);*/ 304 ierr = MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);CHKERRQ(ierr); 305 low = i; 306 } 307 /* printf("nrow for row %d %d\n",nrow,brow);*/ 308 ailen[brow] = nrow; 309 } 310 A->same_nonzero = PETSC_FALSE; 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatLoad_BlockMat" 316 PetscErrorCode MatLoad_BlockMat(PetscViewer viewer, MatType type,Mat *A) 317 { 318 PetscErrorCode ierr; 319 Mat tmpA; 320 PetscInt i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol; 321 const PetscInt *cols; 322 const PetscScalar *values; 323 PetscTruth flg,notdone; 324 Mat_SeqAIJ *a; 325 326 PetscFunctionBegin; 327 ierr = MatLoad_SeqAIJ(viewer,MATSEQAIJ,&tmpA);CHKERRQ(ierr); 328 329 ierr = MatGetLocalSize(tmpA,&m,&n);CHKERRQ(ierr); 330 ierr = PetscOptionsBegin(PETSC_COMM_SELF,PETSC_NULL,"Options for loading BlockMat matrix 1","Mat");CHKERRQ(ierr); 331 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 332 ierr = PetscOptionsName("-matload_symmetric","Store the matrix as symmetric","MatLoad",&flg);CHKERRQ(ierr); 333 ierr = PetscOptionsEnd();CHKERRQ(ierr); 334 335 a = (Mat_SeqAIJ*) tmpA->data; 336 mbs = m/bs; 337 ierr = PetscMalloc3(mbs,PetscInt,&lens,bs,PetscInt,&ii,bs,PetscInt,&ilens);CHKERRQ(ierr); 338 ierr = PetscMemzero(lens,mbs*sizeof(PetscInt));CHKERRQ(ierr); 339 340 for (i=0; i<mbs; i++) { 341 for (j=0; j<bs; j++) { 342 currentcol = -1; 343 CHKMEMQ; 344 ii[j] = a->j + a->i[i*bs + j]; 345 CHKMEMQ; 346 ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j]; 347 CHKMEMQ; 348 /* printf("j %d length %d\n",j,ilens[j]);*/ 349 } 350 notdone = PETSC_TRUE; 351 while (PETSC_TRUE) { 352 notdone = PETSC_FALSE; 353 nextcol = 1000000000; 354 for (j=0; j<bs; j++) { 355 /* printf("loop j %d length %d\n",j,ilens[j]); */ 356 while ((ii[j][0]/bs <= currentcol && ilens[j] > 0) || (flg && ii[j][0]/bs < i)) { 357 ii[j]++; 358 CHKMEMQ; 359 ilens[j]--; 360 CHKMEMQ; 361 } 362 if (ilens[j] > 0) { 363 notdone = PETSC_TRUE; 364 nextcol = PetscMin(nextcol,ii[j][0]/bs); 365 } 366 } 367 if (!notdone) break; 368 currentcol = nextcol; 369 CHKMEMQ; 370 lens[i]++; 371 CHKMEMQ; 372 } 373 /* printf("len of i %d %d\n",i,lens[i]);*/ 374 } 375 CHKMEMQ; 376 377 ierr = MatCreateBlockMat(PETSC_COMM_SELF,m,n,bs,0,lens,A);CHKERRQ(ierr); 378 ierr = PetscFree3(lens,ii,ilens);CHKERRQ(ierr); 379 if (flg) { 380 ierr = MatSetOption(*A,MAT_SYMMETRIC);CHKERRQ(ierr); 381 } 382 for (i=0; i<m; i++) { 383 ierr = MatGetRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 384 ierr = MatSetValues(*A,1,&i,ncols,cols,values,INSERT_VALUES);CHKERRQ(ierr); 385 ierr = MatRestoreRow(tmpA,i,&ncols,&cols,&values);CHKERRQ(ierr); 386 } 387 ierr = MatAssemblyBegin(*A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 388 ierr = MatAssemblyEnd(*A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "MatView_BlockMat" 394 PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer) 395 { 396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 397 PetscErrorCode ierr; 398 const char *name; 399 PetscViewerFormat format; 400 401 PetscFunctionBegin; 402 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 403 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 404 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 405 ierr = PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);CHKERRQ(ierr); 406 } 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatDestroy_BlockMat" 412 PetscErrorCode MatDestroy_BlockMat(Mat mat) 413 { 414 PetscErrorCode ierr; 415 Mat_BlockMat *bmat = (Mat_BlockMat*)mat->data; 416 PetscInt i; 417 418 PetscFunctionBegin; 419 if (bmat->right) { 420 ierr = VecDestroy(bmat->right);CHKERRQ(ierr); 421 } 422 if (bmat->left) { 423 ierr = VecDestroy(bmat->left);CHKERRQ(ierr); 424 } 425 if (bmat->middle) { 426 ierr = VecDestroy(bmat->middle);CHKERRQ(ierr); 427 } 428 if (bmat->workb) { 429 ierr = VecDestroy(bmat->workb);CHKERRQ(ierr); 430 } 431 if (bmat->diags) { 432 for (i=0; i<mat->rmap.n/mat->rmap.bs; i++) { 433 if (bmat->diags[i]) {ierr = MatDestroy(bmat->diags[i]);CHKERRQ(ierr);} 434 } 435 } 436 if (bmat->a) { 437 for (i=0; i<bmat->nz; i++) { 438 if (bmat->a[i]) {ierr = MatDestroy(bmat->a[i]);CHKERRQ(ierr);} 439 } 440 } 441 ierr = MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);CHKERRQ(ierr); 442 ierr = PetscFree(bmat);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatMult_BlockMat" 448 PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y) 449 { 450 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 451 PetscErrorCode ierr; 452 PetscScalar *xx,*yy; 453 PetscInt *aj,i,*ii,jrow,m = A->rmap.n/A->rmap.bs,bs = A->rmap.bs,n,j; 454 Mat *aa; 455 456 PetscFunctionBegin; 457 CHKMEMQ; 458 /* 459 Standard CSR multiply except each entry is a Mat 460 */ 461 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 462 463 ierr = VecSet(y,0.0);CHKERRQ(ierr); 464 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 465 aj = bmat->j; 466 aa = bmat->a; 467 ii = bmat->i; 468 for (i=0; i<m; i++) { 469 jrow = ii[i]; 470 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 471 n = ii[i+1] - jrow; 472 for (j=0; j<n; j++) { 473 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 474 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 475 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 476 jrow++; 477 } 478 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 479 } 480 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 481 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 482 CHKMEMQ; 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMult_BlockMat_Symmetric" 488 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y) 489 { 490 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 491 PetscErrorCode ierr; 492 PetscScalar *xx,*yy; 493 PetscInt *aj,i,*ii,jrow,m = A->rmap.n/A->rmap.bs,bs = A->rmap.bs,n,j; 494 Mat *aa; 495 496 PetscFunctionBegin; 497 CHKMEMQ; 498 /* 499 Standard CSR multiply except each entry is a Mat 500 */ 501 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 502 503 ierr = VecSet(y,0.0);CHKERRQ(ierr); 504 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 505 aj = bmat->j; 506 aa = bmat->a; 507 ii = bmat->i; 508 for (i=0; i<m; i++) { 509 jrow = ii[i]; 510 n = ii[i+1] - jrow; 511 ierr = VecPlaceArray(bmat->left,yy + bs*i);CHKERRQ(ierr); 512 ierr = VecPlaceArray(bmat->middle,xx + bs*i);CHKERRQ(ierr); 513 /* if we ALWAYS required a diagonal entry then could remove this if test */ 514 if (aj[jrow] == i) { 515 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); 516 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 517 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 518 jrow++; 519 n--; 520 } 521 for (j=0; j<n; j++) { 522 ierr = VecPlaceArray(bmat->right,xx + bs*aj[jrow]);CHKERRQ(ierr); /* upper triangular part */ 523 ierr = MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);CHKERRQ(ierr); 524 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 525 526 ierr = VecPlaceArray(bmat->right,yy + bs*aj[jrow]);CHKERRQ(ierr); /* lower triangular part */ 527 ierr = MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);CHKERRQ(ierr); 528 ierr = VecResetArray(bmat->right);CHKERRQ(ierr); 529 jrow++; 530 } 531 ierr = VecResetArray(bmat->left);CHKERRQ(ierr); 532 ierr = VecResetArray(bmat->middle);CHKERRQ(ierr); 533 } 534 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 535 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 536 CHKMEMQ; 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMultAdd_BlockMat" 542 PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 543 { 544 PetscFunctionBegin; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatMultTranspose_BlockMat" 550 PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y) 551 { 552 PetscFunctionBegin; 553 PetscFunctionReturn(0); 554 } 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatMultTransposeAdd_BlockMat" 558 PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z) 559 { 560 PetscFunctionBegin; 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 Adds diagonal pointers to sparse matrix structure. 566 */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatMarkDiagonal_BlockMat" 569 PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) 570 { 571 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 572 PetscErrorCode ierr; 573 PetscInt i,j,mbs = A->rmap.n/A->rmap.bs; 574 575 PetscFunctionBegin; 576 if (!a->diag) { 577 ierr = PetscMalloc(mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 578 } 579 for (i=0; i<mbs; i++) { 580 a->diag[i] = a->i[i+1]; 581 for (j=a->i[i]; j<a->i[i+1]; j++) { 582 if (a->j[j] == i) { 583 a->diag[i] = j; 584 break; 585 } 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatGetSubMatrix_BlockMat" 593 PetscErrorCode MatGetSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,PetscInt csize,MatReuse scall,Mat *B) 594 { 595 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 596 Mat_SeqAIJ *c; 597 PetscErrorCode ierr; 598 PetscInt i,k,first,step,lensi,nrows,ncols; 599 PetscInt *j_new,*i_new,*aj = a->j,*ai = a->i,ii,*ailen = a->ilen; 600 PetscScalar *a_new,value; 601 Mat C,*aa = a->a; 602 PetscTruth stride,equal; 603 604 PetscFunctionBegin; 605 ierr = ISEqual(isrow,iscol,&equal);CHKERRQ(ierr); 606 if (!equal) SETERRQ(PETSC_ERR_SUP,"Only for idential column and row indices"); 607 ierr = ISStride(iscol,&stride);CHKERRQ(ierr); 608 if (!stride) SETERRQ(PETSC_ERR_SUP,"Only for stride indices"); 609 ierr = ISStrideGetInfo(iscol,&first,&step);CHKERRQ(ierr); 610 if (step != A->rmap.bs) SETERRQ(PETSC_ERR_SUP,"Can only select one entry from each block"); 611 612 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 613 ncols = nrows; 614 615 /* create submatrix */ 616 if (scall == MAT_REUSE_MATRIX) { 617 PetscInt n_cols,n_rows; 618 C = *B; 619 ierr = MatGetSize(C,&n_rows,&n_cols);CHKERRQ(ierr); 620 if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); 621 ierr = MatZeroEntries(C);CHKERRQ(ierr); 622 } else { 623 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 624 ierr = MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 625 if (A->symmetric) { 626 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 627 } else { 628 ierr = MatSetType(C,MATSEQAIJ);CHKERRQ(ierr); 629 } 630 ierr = MatSeqAIJSetPreallocation(C,0,ailen);CHKERRQ(ierr); 631 ierr = MatSeqSBAIJSetPreallocation(C,1,0,ailen);CHKERRQ(ierr); 632 } 633 c = (Mat_SeqAIJ*)C->data; 634 635 /* loop over rows inserting into submatrix */ 636 a_new = c->a; 637 j_new = c->j; 638 i_new = c->i; 639 640 for (i=0; i<nrows; i++) { 641 ii = ai[i]; 642 lensi = ailen[i]; 643 for (k=0; k<lensi; k++) { 644 *j_new++ = *aj++; 645 ierr = MatGetValue(*aa++,first,first,value);CHKERRQ(ierr); 646 *a_new++ = value; 647 } 648 i_new[i+1] = i_new[i] + lensi; 649 c->ilen[i] = lensi; 650 } 651 652 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 653 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 654 *B = C; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_BlockMat" 660 PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode) 661 { 662 Mat_BlockMat *a = (Mat_BlockMat*)A->data; 663 PetscErrorCode ierr; 664 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 665 PetscInt m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0; 666 Mat *aa = a->a,*ap; 667 668 PetscFunctionBegin; 669 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 670 671 if (m) rmax = ailen[0]; /* determine row with most nonzeros */ 672 for (i=1; i<m; i++) { 673 /* move each row back by the amount of empty slots (fshift) before it*/ 674 fshift += imax[i-1] - ailen[i-1]; 675 rmax = PetscMax(rmax,ailen[i]); 676 if (fshift) { 677 ip = aj + ai[i] ; 678 ap = aa + ai[i] ; 679 N = ailen[i]; 680 for (j=0; j<N; j++) { 681 ip[j-fshift] = ip[j]; 682 ap[j-fshift] = ap[j]; 683 } 684 } 685 ai[i] = ai[i-1] + ailen[i-1]; 686 } 687 if (m) { 688 fshift += imax[m-1] - ailen[m-1]; 689 ai[m] = ai[m-1] + ailen[m-1]; 690 } 691 /* reset ilen and imax for each row */ 692 for (i=0; i<m; i++) { 693 ailen[i] = imax[i] = ai[i+1] - ai[i]; 694 } 695 a->nz = ai[m]; 696 for (i=0; i<a->nz; i++) { 697 #if defined(PETSC_USE_DEBUG) 698 if (!aa[i]) SETERRQ3(PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz); 699 #endif 700 ierr = MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 701 ierr = MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 702 } 703 CHKMEMQ; 704 ierr = PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap.n/A->cmap.bs,fshift,a->nz);CHKERRQ(ierr); 705 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 706 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);CHKERRQ(ierr); 707 a->reallocs = 0; 708 A->info.nz_unneeded = (double)fshift; 709 a->rmax = rmax; 710 711 A->same_nonzero = PETSC_TRUE; 712 ierr = MatMarkDiagonal_BlockMat(A);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatSetOption_BlockMat" 718 PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt) 719 { 720 PetscFunctionBegin; 721 if (opt == MAT_SYMMETRIC) { 722 A->ops->relax = MatRelax_BlockMat_Symmetric; 723 A->ops->mult = MatMult_BlockMat_Symmetric; 724 } else { 725 PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 731 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat, 732 0, 733 0, 734 MatMult_BlockMat, 735 /* 4*/ MatMultAdd_BlockMat, 736 MatMultTranspose_BlockMat, 737 MatMultTransposeAdd_BlockMat, 738 0, 739 0, 740 0, 741 /*10*/ 0, 742 0, 743 0, 744 MatRelax_BlockMat, 745 0, 746 /*15*/ 0, 747 0, 748 0, 749 0, 750 0, 751 /*20*/ 0, 752 MatAssemblyEnd_BlockMat, 753 0, 754 MatSetOption_BlockMat, 755 0, 756 /*25*/ 0, 757 0, 758 0, 759 0, 760 0, 761 /*30*/ 0, 762 0, 763 0, 764 0, 765 0, 766 /*35*/ 0, 767 0, 768 0, 769 0, 770 0, 771 /*40*/ 0, 772 0, 773 0, 774 0, 775 0, 776 /*45*/ 0, 777 0, 778 0, 779 0, 780 0, 781 /*50*/ 0, 782 0, 783 0, 784 0, 785 0, 786 /*55*/ 0, 787 0, 788 0, 789 0, 790 0, 791 /*60*/ MatGetSubMatrix_BlockMat, 792 MatDestroy_BlockMat, 793 MatView_BlockMat, 794 0, 795 0, 796 /*65*/ 0, 797 0, 798 0, 799 0, 800 0, 801 /*70*/ 0, 802 0, 803 0, 804 0, 805 0, 806 /*75*/ 0, 807 0, 808 0, 809 0, 810 0, 811 /*80*/ 0, 812 0, 813 0, 814 0, 815 MatLoad_BlockMat, 816 /*85*/ 0, 817 0, 818 0, 819 0, 820 0, 821 /*90*/ 0, 822 0, 823 0, 824 0, 825 0, 826 /*95*/ 0, 827 0, 828 0, 829 0}; 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatBlockMatSetPreallocation" 833 /*@C 834 MatBlockMatSetPreallocation - For good matrix assembly performance 835 the user should preallocate the matrix storage by setting the parameter nz 836 (or the array nnz). By setting these parameters accurately, performance 837 during matrix assembly can be increased by more than a factor of 50. 838 839 Collective on MPI_Comm 840 841 Input Parameters: 842 + B - The matrix 843 . bs - size of each block in matrix 844 . nz - number of nonzeros per block row (same for all rows) 845 - nnz - array containing the number of nonzeros in the various block rows 846 (possibly different for each row) or PETSC_NULL 847 848 Notes: 849 If nnz is given then nz is ignored 850 851 Specify the preallocated storage with either nz or nnz (not both). 852 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 853 allocation. For large problems you MUST preallocate memory or you 854 will get TERRIBLE performance, see the users' manual chapter on matrices. 855 856 Level: intermediate 857 858 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues() 859 860 @*/ 861 PetscErrorCode PETSCMAT_DLLEXPORT MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 862 { 863 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 864 865 PetscFunctionBegin; 866 ierr = PetscObjectQueryFunction((PetscObject)B,"MatBlockMatSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 867 if (f) { 868 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 EXTERN_C_BEGIN 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatBlockMatSetPreallocation_BlockMat" 876 PetscErrorCode PETSCMAT_DLLEXPORT MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz) 877 { 878 Mat_BlockMat *bmat = (Mat_BlockMat*)A->data; 879 PetscErrorCode ierr; 880 PetscInt i; 881 882 PetscFunctionBegin; 883 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Block size given %D must be great than zero",bs); 884 if (A->rmap.n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of rows %D",bs,A->rmap.n); 885 if (A->cmap.n % bs) SETERRQ2(PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of columns %D",bs,A->cmap.n); 886 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 887 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 888 if (nnz) { 889 for (i=0; i<A->rmap.n/bs; i++) { 890 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 891 if (nnz[i] > A->cmap.n/bs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap.n/bs); 892 } 893 } 894 A->rmap.bs = A->cmap.bs = bs; 895 bmat->mbs = A->rmap.n/bs; 896 897 printf("A->rmap.n%d %d\n",A->rmap.n, bs); 898 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->right);CHKERRQ(ierr); 899 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->middle);CHKERRQ(ierr); 900 ierr = VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);CHKERRQ(ierr); 901 902 ierr = PetscMalloc2(A->rmap.n,PetscInt,&bmat->imax,A->rmap.n,PetscInt,&bmat->ilen);CHKERRQ(ierr); 903 if (nnz) { 904 nz = 0; 905 for (i=0; i<A->rmap.n/A->rmap.bs; i++) { 906 bmat->imax[i] = nnz[i]; 907 nz += nnz[i]; 908 } 909 } else { 910 SETERRQ(PETSC_ERR_SUP,"Currently requires block row by row preallocation"); 911 } 912 913 /* bmat->ilen will count nonzeros in each row so far. */ 914 for (i=0; i<bmat->mbs; i++) { bmat->ilen[i] = 0;} 915 916 /* allocate the matrix space */ 917 ierr = PetscMalloc3(nz,Mat,&bmat->a,nz,PetscInt,&bmat->j,A->rmap.n+1,PetscInt,&bmat->i);CHKERRQ(ierr); 918 bmat->i[0] = 0; 919 for (i=1; i<bmat->mbs+1; i++) { 920 bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1]; 921 } 922 bmat->singlemalloc = PETSC_TRUE; 923 bmat->free_a = PETSC_TRUE; 924 bmat->free_ij = PETSC_TRUE; 925 926 bmat->nz = 0; 927 bmat->maxnz = nz; 928 A->info.nz_unneeded = (double)bmat->maxnz; 929 930 PetscFunctionReturn(0); 931 } 932 EXTERN_C_END 933 934 /*MC 935 MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix 936 consisting of (usually) sparse blocks. 937 938 Level: advanced 939 940 .seealso: MatCreateBlockMat() 941 942 M*/ 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatCreate_BlockMat" 947 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BlockMat(Mat A) 948 { 949 Mat_BlockMat *b; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 954 ierr = PetscNew(Mat_BlockMat,&b);CHKERRQ(ierr); 955 956 A->data = (void*)b; 957 958 ierr = PetscMapInitialize(A->comm,&A->rmap);CHKERRQ(ierr); 959 ierr = PetscMapInitialize(A->comm,&A->cmap);CHKERRQ(ierr); 960 961 A->assembled = PETSC_TRUE; 962 A->preallocated = PETSC_FALSE; 963 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);CHKERRQ(ierr); 964 965 ierr = PetscOptionsBegin(A->comm,A->prefix,"Matrix Option","Mat");CHKERRQ(ierr); 966 ierr = PetscOptionsEnd(); 967 968 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatBlockMatSetPreallocation_C", 969 "MatBlockMatSetPreallocation_BlockMat", 970 MatBlockMatSetPreallocation_BlockMat);CHKERRQ(ierr); 971 972 PetscFunctionReturn(0); 973 } 974 EXTERN_C_END 975 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatCreateBlockMat" 978 /*@C 979 MatCreateBlockMat - Creates a new matrix based sparse Mat storage 980 981 Collective on MPI_Comm 982 983 Input Parameters: 984 + comm - MPI communicator 985 . m - number of rows 986 . n - number of columns 987 . bs - size of each submatrix 988 . nz - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known) 989 - nnz - expected number of nonzers per block row if known (use PETSC_NULL otherwise) 990 991 992 Output Parameter: 993 . A - the matrix 994 995 Level: intermediate 996 997 PETSc requires that matrices and vectors being used for certain 998 operations are partitioned accordingly. For example, when 999 creating a bmat matrix, A, that supports parallel matrix-vector 1000 products using MatMult(A,x,y) the user should set the number 1001 of local matrix rows to be the number of local elements of the 1002 corresponding result vector, y. Note that this is information is 1003 required for use of the matrix interface routines, even though 1004 the bmat matrix may not actually be physically partitioned. 1005 For example, 1006 1007 .keywords: matrix, bmat, create 1008 1009 .seealso: MATBLOCKMAT 1010 @*/ 1011 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A) 1012 { 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1017 ierr = MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1018 ierr = MatSetType(*A,MATBLOCKMAT);CHKERRQ(ierr); 1019 ierr = MatBlockMatSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 1024 1025